diff --git a/protocol/chainlib/consumer_ws_subscription_manager_test.go b/protocol/chainlib/consumer_ws_subscription_manager_test.go index 4683eac50e..552aa78cdc 100644 --- a/protocol/chainlib/consumer_ws_subscription_manager_test.go +++ b/protocol/chainlib/consumer_ws_subscription_manager_test.go @@ -722,10 +722,9 @@ func TestConsumerWSSubscriptionManager(t *testing.T) { func CreateConsumerSessionManager(chainID, apiInterface, consumerPublicAddress string) *lavasession.ConsumerSessionManager { rand.InitRandomSeed() - baseLatency := common.AverageWorldLatency / 2 // we want performance to be half our timeout or better return lavasession.NewConsumerSessionManager( &lavasession.RPCEndpoint{NetworkAddress: "stub", ChainID: chainID, ApiInterface: apiInterface, TLSEnabled: false, HealthCheckPath: "/", Geolocation: 0}, - provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, 0, baseLatency, 1, nil, "dontcare"), + provideroptimizer.NewProviderOptimizer(provideroptimizer.StrategyBalanced, 0, 1, nil, "dontcare"), nil, nil, consumerPublicAddress, lavasession.NewActiveSubscriptionProvidersStorage(), ) diff --git a/protocol/integration/protocol_test.go b/protocol/integration/protocol_test.go index c1a50554a6..8d564a5fa2 100644 --- a/protocol/integration/protocol_test.go +++ b/protocol/integration/protocol_test.go @@ -232,8 +232,7 @@ func createRpcConsumer(t *testing.T, ctx context.Context, rpcConsumerOptions rpc consumerStateTracker := &mockConsumerStateTracker{} finalizationConsensus := finalizationconsensus.NewFinalizationConsensus(rpcEndpoint.ChainID) _, averageBlockTime, _, _ := chainParser.ChainBlockStats() - baseLatency := common.AverageWorldLatency / 2 - optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2, nil, "dontcare") + optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.StrategyBalanced, averageBlockTime, 2, nil, "dontcare") consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil, "test", lavasession.NewActiveSubscriptionProvidersStorage()) consumerSessionManager.UpdateAllProviders(rpcConsumerOptions.epoch, rpcConsumerOptions.pairingList) diff --git a/protocol/lavaprotocol/request_builder.go b/protocol/lavaprotocol/request_builder.go index aa96ee8730..194d04aba8 100644 --- a/protocol/lavaprotocol/request_builder.go +++ b/protocol/lavaprotocol/request_builder.go @@ -74,9 +74,6 @@ func ConstructRelaySession(lavaChainID string, relayRequestData *pairingtypes.Re copiedQOS := copyQoSServiceReport(singleConsumerSession.QoSInfo.LastQoSReport) copiedExcellenceQOS := copyQoSServiceReport(singleConsumerSession.QoSInfo.LastExcellenceQoSReportRaw) // copy raw report for the node - // validate and fix QoS excellence report before sending it to the node - copiedExcellenceQOS.ValidateAndFixQoSExcellence() - return &pairingtypes.RelaySession{ SpecId: chainID, ContentHash: sigs.HashMsg(relayRequestData.GetContentHashData()), diff --git a/protocol/lavasession/consumer_session_manager.go b/protocol/lavasession/consumer_session_manager.go index c1e179edde..1552041bd3 100644 --- a/protocol/lavasession/consumer_session_manager.go +++ b/protocol/lavasession/consumer_session_manager.go @@ -563,15 +563,15 @@ func (csm *ConsumerSessionManager) GetSessions(ctx context.Context, cuNeededForS sessionInfo.QoSSummeryResult = consumerSession.getQosComputedResultOrZero() sessions[providerAddress] = sessionInfo - qosReport, rawQosReport := csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) + qosReport, _ := csm.providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) if csm.rpcEndpoint.Geolocation != uint64(endpoint.endpoint.Geolocation) { // rawQosReport is used only when building the relay payment message to be used to update // the provider's reputation on-chain. If the consumer and provider don't share geolocation // (consumer geo: csm.rpcEndpoint.Geolocation, provider geo: endpoint.endpoint.Geolocation) // we don't want to update the reputation by it, so we null the rawQosReport - rawQosReport = nil + qosReport = nil } - consumerSession.SetUsageForSession(cuNeededForSession, qosReport, rawQosReport, usedProviders, routerKey) + consumerSession.SetUsageForSession(cuNeededForSession, qosReport, usedProviders, routerKey) // We successfully added provider, we should ignore it if we need to fetch new tempIgnoredProviders.providers[providerAddress] = struct{}{} if len(sessions) == wantedSession { @@ -641,7 +641,7 @@ func (csm *ConsumerSessionManager) getValidProviderAddresses(ignoredProvidersLis } } var providers []string - if stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.STRATEGY_COST { + if stateful == common.CONSISTENCY_SELECT_ALL_PROVIDERS && csm.providerOptimizer.Strategy() != provideroptimizer.StrategyCost { providers = csm.getTopTenProvidersForStatefulCalls(validAddresses, ignoredProvidersList) } else { providers, _ = csm.providerOptimizer.ChooseProvider(validAddresses, ignoredProvidersList, cu, requestedBlock) @@ -1048,7 +1048,11 @@ func (csm *ConsumerSessionManager) OnSessionDone( consumerSession.LatestBlock = latestServicedBlock // update latest serviced block // calculate QoS consumerSession.CalculateQoS(currentLatency, expectedLatency, expectedBH-latestServicedBlock, numOfProviders, int64(providersCount)) - go csm.providerOptimizer.AppendRelayData(consumerSession.Parent.PublicLavaAddress, currentLatency, isHangingApi, specComputeUnits, uint64(latestServicedBlock)) + if !isHangingApi { + // append relay data only for non hanging apis + go csm.providerOptimizer.AppendRelayData(consumerSession.Parent.PublicLavaAddress, currentLatency, specComputeUnits, uint64(latestServicedBlock)) + } + csm.updateMetricsManager(consumerSession, currentLatency, !isHangingApi) // apply latency only for non hanging apis return nil } diff --git a/protocol/lavasession/consumer_session_manager_test.go b/protocol/lavasession/consumer_session_manager_test.go index c92ead202d..abb34c224e 100644 --- a/protocol/lavasession/consumer_session_manager_test.go +++ b/protocol/lavasession/consumer_session_manager_test.go @@ -161,8 +161,7 @@ func TestEndpointSortingFlow(t *testing.T) { func CreateConsumerSessionManager() *ConsumerSessionManager { rand.InitRandomSeed() - baseLatency := common.AverageWorldLatency / 2 // we want performance to be half our timeout or better - return NewConsumerSessionManager(&RPCEndpoint{"stub", "stub", "stub", false, "/", 0}, provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, 0, baseLatency, 1, nil, "dontcare"), nil, nil, "lava@test", NewActiveSubscriptionProvidersStorage()) + return NewConsumerSessionManager(&RPCEndpoint{"stub", "stub", "stub", false, "/", 0}, provideroptimizer.NewProviderOptimizer(provideroptimizer.StrategyBalanced, 0, 1, nil, "dontcare"), nil, nil, "lava@test", NewActiveSubscriptionProvidersStorage()) } func TestMain(m *testing.M) { diff --git a/protocol/lavasession/consumer_types.go b/protocol/lavasession/consumer_types.go index 22c6bed45c..7fc97a0fb5 100644 --- a/protocol/lavasession/consumer_types.go +++ b/protocol/lavasession/consumer_types.go @@ -72,9 +72,9 @@ type ConsumerSessionsMap map[string]*SessionInfo type ProviderOptimizer interface { AppendProbeRelayData(providerAddress string, latency time.Duration, success bool) AppendRelayFailure(providerAddress string) - AppendRelayData(providerAddress string, latency time.Duration, isHangingApi bool, cu, syncBlock uint64) + AppendRelayData(providerAddress string, latency time.Duration, cu, syncBlock uint64) ChooseProvider(allAddresses []string, ignoredProviders map[string]struct{}, cu uint64, requestedBlock int64) (addresses []string, tier int) - GetExcellenceQoSReportForProvider(string) (*pairingtypes.QualityOfServiceReport, *pairingtypes.QualityOfServiceReport) + GetExcellenceQoSReportForProvider(string) (*pairingtypes.QualityOfServiceReport, time.Time) Strategy() provideroptimizer.Strategy UpdateWeights(map[string]int64, uint64) } diff --git a/protocol/lavasession/single_consumer_session.go b/protocol/lavasession/single_consumer_session.go index 221cd70f20..54f5a6f41b 100644 --- a/protocol/lavasession/single_consumer_session.go +++ b/protocol/lavasession/single_consumer_session.go @@ -104,13 +104,12 @@ func (cs *SingleConsumerSession) CalculateQoS(latency, expectedLatency time.Dura } } -func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, rawQoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf, routerKey RouterKey) error { +func (scs *SingleConsumerSession) SetUsageForSession(cuNeededForSession uint64, qoSExcellenceReport *pairingtypes.QualityOfServiceReport, usedProviders UsedProvidersInf, routerKey RouterKey) error { scs.LatestRelayCu = cuNeededForSession // set latestRelayCu scs.RelayNum += RelayNumberIncrement // increase relayNum if scs.RelayNum > 1 { // we only set excellence for sessions with more than one successful relays, this guarantees data within the epoch exists scs.QoSInfo.LastExcellenceQoSReport = qoSExcellenceReport - scs.QoSInfo.LastExcellenceQoSReportRaw = rawQoSExcellenceReport } scs.usedProviders = usedProviders scs.routerKey = routerKey diff --git a/protocol/provideroptimizer/provider_optimizer.go b/protocol/provideroptimizer/provider_optimizer.go index 2e34963c34..77f64311c7 100644 --- a/protocol/provideroptimizer/provider_optimizer.go +++ b/protocol/provideroptimizer/provider_optimizer.go @@ -1,34 +1,37 @@ package provideroptimizer import ( - "math" + "fmt" "strings" "sync" "time" + stdMath "math" + + "cosmossdk.io/math" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/dgraph-io/ristretto/v2" - "github.com/lavanet/lava/v4/protocol/common" "github.com/lavanet/lava/v4/protocol/metrics" "github.com/lavanet/lava/v4/utils" "github.com/lavanet/lava/v4/utils/lavaslices" "github.com/lavanet/lava/v4/utils/rand" "github.com/lavanet/lava/v4/utils/score" pairingtypes "github.com/lavanet/lava/v4/x/pairing/types" + spectypes "github.com/lavanet/lava/v4/x/spec/types" "gonum.org/v1/gonum/mathext" ) +// The provider optimizer is a mechanism within the consumer that is responsible for choosing +// the optimal provider for the consumer. +// The choice depends on the provider's QoS excellence metrics: latency, sync and availability. +// Providers are picked by selection tiers that take into account their stake amount and QoS +// excellence score. + const ( - CacheMaxCost = 20000 // each item cost would be 1 - CacheNumCounters = 20000 // expect 2000 items - INITIAL_DATA_STALENESS = 24 - HALF_LIFE_TIME = time.Hour - MAX_HALF_TIME = 3 * time.Hour - PROBE_UPDATE_WEIGHT = 0.25 - RELAY_UPDATE_WEIGHT = 1 - DEFAULT_EXPLORATION_CHANCE = 0.1 - COST_EXPLORATION_CHANCE = 0.01 - WANTED_PRECISION = int64(8) + CacheMaxCost = 20000 // each item cost would be 1 + CacheNumCounters = 20000 // expect 2000 items + DefaultExplorationChance = 0.1 + CostExplorationChance = 0.01 ) var ( @@ -53,46 +56,91 @@ type cacheInf interface { type consumerOptimizerQoSClientInf interface { UpdatePairingListStake(stakeMap map[string]int64, chainId string, epoch uint64) } + type ProviderOptimizer struct { strategy Strategy providersStorage cacheInf providerRelayStats *ristretto.Cache[string, any] // used to decide on the half time of the decay averageBlockTime time.Duration - baseWorldLatency time.Duration wantedNumProvidersInConcurrency uint latestSyncData ConcurrentBlockStore - selectionWeighter SelectionWeighter - OptimizerNumTiers int + selectionWeighter SelectionWeighter // weights are the providers stake + OptimizerNumTiers int // number of tiers to use + OptimizerMinTierEntries int // minimum number of entries in a tier to be considered for selection consumerOptimizerQoSClient consumerOptimizerQoSClientInf chainId string } +// The exploration mechanism makes the optimizer return providers that were not talking +// to the consumer for a long time (a couple of seconds). This allows better distribution +// of paired providers by avoiding returning the same best providers over and over. +// The Exploration struct holds a provider address and last QoS metrics update time (ScoreStore) type Exploration struct { address string time time.Time } type ProviderData struct { - Availability score.ScoreStore // will be used to calculate the probability of error - Latency score.ScoreStore // will be used to calculate the latency score - Sync score.ScoreStore // will be used to calculate the sync score for spectypes.LATEST_BLOCK/spectypes.NOT_APPLICABLE requests - SyncBlock uint64 // will be used to calculate the probability of block error - LatencyRaw score.ScoreStore // will be used when reporting reputation to the node (Latency = LatencyRaw / baseLatency) - SyncRaw score.ScoreStore // will be used when reporting reputation to the node (Sync = SyncRaw / baseSync) + Availability score.ScoreStorer // will be used to calculate the probability of error + Latency score.ScoreStorer // will be used to calculate the latency score + Sync score.ScoreStorer // will be used to calculate the sync score for spectypes.LATEST_BLOCK/spectypes.NOT_APPLICABLE requests + SyncBlock uint64 // will be used to calculate the probability of block error } +// Strategy defines the pairing strategy. Using different +// strategies allow users to determine the providers type they'll +// be paired with: providers with low latency, fresh sync and more. type Strategy int const ( - STRATEGY_BALANCED Strategy = iota - STRATEGY_LATENCY - STRATEGY_SYNC_FRESHNESS - STRATEGY_COST - STRATEGY_PRIVACY - STRATEGY_ACCURACY - STRATEGY_DISTRIBUTED + StrategyBalanced Strategy = iota + StrategyLatency // prefer low latency + StrategySyncFreshness // prefer better sync + StrategyCost // prefer low CU cost (minimize optimizer exploration) + StrategyPrivacy // prefer pairing with a single provider (not fully implemented) + StrategyAccuracy // encourage optimizer exploration (higher cost) + StrategyDistributed // prefer pairing with different providers (slightly minimize optimizer exploration) ) +func (s Strategy) String() string { + switch s { + case StrategyBalanced: + return "balanced" + case StrategyLatency: + return "latency" + case StrategySyncFreshness: + return "sync_freshness" + case StrategyCost: + return "cost" + case StrategyPrivacy: + return "privacy" + case StrategyAccuracy: + return "accuracy" + case StrategyDistributed: + return "distributed" + } + + return "" +} + +// GetStrategyFactor gets the appropriate factor to multiply the sync factor +// with according to the strategy +func (s Strategy) GetStrategyFactor() math.LegacyDec { + switch s { + case StrategyLatency: + return pairingtypes.LatencyStrategyFactor + case StrategySyncFreshness: + return pairingtypes.SyncFreshnessStrategyFactor + } + + return pairingtypes.BalancedStrategyFactor +} + +func (po *ProviderOptimizer) Strategy() Strategy { + return po.strategy +} + +// UpdateWeights update the selection weighter weights func (po *ProviderOptimizer) UpdateWeights(weights map[string]int64, epoch uint64) { po.selectionWeighter.SetWeights(weights) @@ -102,97 +150,76 @@ func (po *ProviderOptimizer) UpdateWeights(weights map[string]int64, epoch uint6 } } -func (po *ProviderOptimizer) AppendRelayFailure(providerAddress string) { - po.appendRelayData(providerAddress, 0, false, false, 0, 0, time.Now()) +// AppendRelayFailure updates a provider's QoS metrics for a failed relay +func (po *ProviderOptimizer) AppendRelayFailure(provider string) { + po.appendRelayData(provider, 0, false, 0, 0, time.Now()) } -func (po *ProviderOptimizer) AppendRelayData(providerAddress string, latency time.Duration, isHangingApi bool, cu, syncBlock uint64) { - po.appendRelayData(providerAddress, latency, isHangingApi, true, cu, syncBlock, time.Now()) +// AppendRelayData updates a provider's QoS metrics for a successful relay +func (po *ProviderOptimizer) AppendRelayData(provider string, latency time.Duration, cu, syncBlock uint64) { + po.appendRelayData(provider, latency, true, cu, syncBlock, time.Now()) } -func (po *ProviderOptimizer) appendRelayData(providerAddress string, latency time.Duration, isHangingApi, success bool, cu, syncBlock uint64, sampleTime time.Time) { +// appendRelayData gets three new QoS metrics samples and updates the provider's metrics using a decaying weighted average +func (po *ProviderOptimizer) appendRelayData(provider string, latency time.Duration, success bool, cu, syncBlock uint64, sampleTime time.Time) { latestSync, timeSync := po.updateLatestSyncData(syncBlock, sampleTime) - providerData, _ := po.getProviderData(providerAddress) - halfTime := po.calculateHalfTime(providerAddress, sampleTime) - providerData = po.updateProbeEntryAvailability(providerData, success, RELAY_UPDATE_WEIGHT, halfTime, sampleTime) + providerData, _ := po.getProviderData(provider) + halfTime := po.calculateHalfTime(provider, sampleTime) + weight := score.RelayUpdateWeight + if success { - if latency > 0 { - baseLatency := po.baseWorldLatency + common.BaseTimePerCU(cu)/2 - if isHangingApi { - baseLatency += po.averageBlockTime / 2 // hanging apis take longer - } - providerData = po.updateProbeEntryLatency(providerData, latency, baseLatency, RELAY_UPDATE_WEIGHT, halfTime, sampleTime, isHangingApi) - } + // on a successful relay, update all the QoS metrics + providerData = po.updateDecayingWeightedAverage(providerData, score.AvailabilityScoreType, 1, weight, halfTime, cu, sampleTime) + providerData = po.updateDecayingWeightedAverage(providerData, score.LatencyScoreType, latency.Seconds(), weight, halfTime, cu, sampleTime) + if syncBlock > providerData.SyncBlock { // do not allow providers to go back providerData.SyncBlock = syncBlock } syncLag := po.calculateSyncLag(latestSync, timeSync, providerData.SyncBlock, sampleTime) - providerData = po.updateProbeEntrySync(providerData, syncLag, po.averageBlockTime, halfTime, sampleTime, isHangingApi) + providerData = po.updateDecayingWeightedAverage(providerData, score.SyncScoreType, syncLag.Seconds(), weight, halfTime, cu, sampleTime) + } else { + // on a failed relay, update the availability metric with a failure score + providerData = po.updateDecayingWeightedAverage(providerData, score.AvailabilityScoreType, 0, weight, halfTime, cu, sampleTime) } - po.providersStorage.Set(providerAddress, providerData, 1) - po.updateRelayTime(providerAddress, sampleTime) - utils.LavaFormatTrace("relay update", + po.providersStorage.Set(provider, providerData, 1) + po.updateRelayTime(provider, sampleTime) + + utils.LavaFormatTrace("[Optimizer] relay update", utils.LogAttr("providerData", providerData), utils.LogAttr("syncBlock", syncBlock), utils.LogAttr("cu", cu), - utils.LogAttr("providerAddress", providerAddress), + utils.LogAttr("providerAddress", provider), utils.LogAttr("latency", latency), utils.LogAttr("success", success), ) } +// AppendProbeRelayData updates a provider's QoS metrics for a probe relay message func (po *ProviderOptimizer) AppendProbeRelayData(providerAddress string, latency time.Duration, success bool) { providerData, _ := po.getProviderData(providerAddress) sampleTime := time.Now() halfTime := po.calculateHalfTime(providerAddress, sampleTime) - providerData = po.updateProbeEntryAvailability(providerData, success, PROBE_UPDATE_WEIGHT, halfTime, sampleTime) - if success && latency > 0 { - // base latency for a probe is the world latency - providerData = po.updateProbeEntryLatency(providerData, latency, po.baseWorldLatency, PROBE_UPDATE_WEIGHT, halfTime, sampleTime, false) + weight := score.ProbeUpdateWeight + + if success { + // update latency only on success + providerData = po.updateDecayingWeightedAverage(providerData, score.AvailabilityScoreType, 1, weight, halfTime, 0, sampleTime) + providerData = po.updateDecayingWeightedAverage(providerData, score.LatencyScoreType, latency.Seconds(), weight, halfTime, 0, sampleTime) + } else { + providerData = po.updateDecayingWeightedAverage(providerData, score.AvailabilityScoreType, 0, weight, halfTime, 0, sampleTime) } po.providersStorage.Set(providerAddress, providerData, 1) - utils.LavaFormatTrace("probe update", + utils.LavaFormatTrace("[Optimizer] probe update", utils.LogAttr("providerAddress", providerAddress), utils.LogAttr("latency", latency), utils.LogAttr("success", success), ) } -func (po *ProviderOptimizer) calcLatencyAndSyncScores(providerData ProviderData, cu uint64, requestedBlock int64) (float64, float64) { - // latency score - latencyScoreCurrent := po.calculateLatencyScore(providerData, cu, requestedBlock) // smaller == better i.e less latency - - // sync score - syncScoreCurrent := float64(0) - if requestedBlock < 0 { - // means user didn't ask for a specific block and we want to give him the best - syncScoreCurrent = po.calculateSyncScore(providerData.Sync) // smaller == better i.e less sync lag - } - - return latencyScoreCurrent, syncScoreCurrent -} - -func (po *ProviderOptimizer) CalculateQoSScoresForMetrics(allAddresses []string, ignoredProviders map[string]struct{}, cu uint64, requestedBlock int64) []*metrics.OptimizerQoSReport { - selectionTier, _, providersScores := po.CalculateSelectionTiers(allAddresses, ignoredProviders, cu, requestedBlock) - reports := []*metrics.OptimizerQoSReport{} - - rawScores := selectionTier.GetRawScores() - for idx, entry := range rawScores { - qosReport := providersScores[entry.Address] - qosReport.EntryIndex = idx - reports = append(reports, qosReport) - } - - return reports -} - func (po *ProviderOptimizer) CalculateSelectionTiers(allAddresses []string, ignoredProviders map[string]struct{}, cu uint64, requestedBlock int64) (SelectionTier, Exploration, map[string]*metrics.OptimizerQoSReport) { - latencyScore := math.MaxFloat64 // smaller = better i.e less latency - syncScore := math.MaxFloat64 // smaller = better i.e less sync lag - explorationCandidate := Exploration{address: "", time: time.Now().Add(time.Hour)} selectionTier := NewSelectionTier() providerScores := make(map[string]*metrics.OptimizerQoSReport) @@ -202,36 +229,65 @@ func (po *ProviderOptimizer) CalculateSelectionTiers(allAddresses []string, igno continue } - providerData, found := po.getProviderData(providerAddress) - if !found { - utils.LavaFormatTrace("provider data was not found for address", utils.LogAttr("providerAddress", providerAddress)) + qos, lastUpdateTime := po.GetExcellenceQoSReportForProvider(providerAddress) + if qos == nil { + utils.LavaFormatWarning("[Optimizer] cannot calculate selection tiers", + fmt.Errorf("could not get QoS excellece report for provider"), + utils.LogAttr("provider", providerAddress), + ) + return NewSelectionTier(), Exploration{}, nil } - latencyScoreCurrent, syncScoreCurrent := po.calcLatencyAndSyncScores(providerData, cu, requestedBlock) - - utils.LavaFormatTrace("scores information", + utils.LavaFormatTrace("[Optimizer] scores information", utils.LogAttr("providerAddress", providerAddress), - utils.LogAttr("latencyScoreCurrent", latencyScoreCurrent), - utils.LogAttr("syncScoreCurrent", syncScoreCurrent), - utils.LogAttr("latencyScore", latencyScore), - utils.LogAttr("syncScore", syncScore), + utils.LogAttr("latencyScore", qos.Latency.String()), + utils.LogAttr("syncScore", qos.Sync.String()), + utils.LogAttr("availabilityScore", qos.Availability.String()), ) - providerScore := po.calcProviderScore(latencyScoreCurrent, syncScoreCurrent) + opts := []pairingtypes.Option{pairingtypes.WithStrategyFactor(po.strategy.GetStrategyFactor())} + if requestedBlock >= 0 { + providerData, found := po.getProviderData(providerAddress) + if !found { + utils.LavaFormatTrace("[Optimizer] could not get provider data, using default", utils.LogAttr("provider", providerAddress)) + } + // add block error probability config if the request block is positive + opts = append(opts, pairingtypes.WithBlockErrorProbability(po.CalculateProbabilityOfBlockError(requestedBlock, providerData))) + } else if requestedBlock == spectypes.EARLIEST_BLOCK { + // if the request block is earliest, we use the latest block as the requested block + requestedBlock = spectypes.LATEST_BLOCK + } else if requestedBlock != spectypes.LATEST_BLOCK && requestedBlock != spectypes.NOT_APPLICABLE { + // if the request block is not positive but not latest/not-applicable - return an error + utils.LavaFormatWarning("[Optimizer] cannot calculate selection tiers", + fmt.Errorf("could not configure block error probability, invalid requested block (must be >0 or -1 or -2)"), + utils.LogAttr("provider", providerAddress), + utils.LogAttr("requested_block", requestedBlock), + ) + return NewSelectionTier(), Exploration{}, nil + } + score, err := qos.ComputeQoSExcellenceFloat64(opts...) + if err != nil { + utils.LavaFormatWarning("[Optimizer] cannot calculate selection tiers", err, + utils.LogAttr("provider", providerAddress), + utils.LogAttr("qos_report", qos.String()), + ) + return NewSelectionTier(), Exploration{}, nil + } + latency, sync, availability := qos.GetScoresFloat64() providerScores[providerAddress] = &metrics.OptimizerQoSReport{ ProviderAddress: providerAddress, - SyncScore: syncScoreCurrent, - AvailabilityScore: providerData.Availability.Num / providerData.Availability.Denom, - LatencyScore: latencyScoreCurrent, - GenericScore: providerScore, + SyncScore: sync, + AvailabilityScore: availability, + LatencyScore: latency, + GenericScore: score, } - selectionTier.AddScore(providerAddress, providerScore) + + selectionTier.AddScore(providerAddress, score) // check if candidate for exploration - updateTime := providerData.Latency.Time - if updateTime.Add(10*time.Second).Before(time.Now()) && updateTime.Before(explorationCandidate.time) { + if lastUpdateTime.Add(10*time.Second).Before(time.Now()) && lastUpdateTime.Before(explorationCandidate.time) { // if the provider didn't update its data for 10 seconds, it is a candidate for exploration - explorationCandidate = Exploration{address: providerAddress, time: updateTime} + explorationCandidate = Exploration{address: providerAddress, time: lastUpdateTime} } } return selectionTier, explorationCandidate, providerScores @@ -242,13 +298,13 @@ func (po *ProviderOptimizer) ChooseProvider(allAddresses []string, ignoredProvid selectionTier, explorationCandidate, _ := po.CalculateSelectionTiers(allAddresses, ignoredProviders, cu, requestedBlock) selectionTierScoresCount := selectionTier.ScoresCount() - localMinimumEntries := MinimumEntries + localMinimumEntries := po.OptimizerMinTierEntries if AutoAdjustTiers { - adjustedProvidersPerTier := int(math.Ceil(float64(selectionTierScoresCount) / float64(po.OptimizerNumTiers))) - if MinimumEntries > adjustedProvidersPerTier { + adjustedProvidersPerTier := int(stdMath.Ceil(float64(selectionTierScoresCount) / float64(po.OptimizerNumTiers))) + if localMinimumEntries > adjustedProvidersPerTier { utils.LavaFormatTrace("optimizer AutoAdjustTiers activated", utils.LogAttr("set_to_adjustedProvidersPerTier", adjustedProvidersPerTier), - utils.LogAttr("was_MinimumEntries", MinimumEntries), + utils.LogAttr("was_MinimumEntries", po.OptimizerMinTierEntries), utils.LogAttr("tiers_count_po.OptimizerNumTiers", po.OptimizerNumTiers), utils.LogAttr("selectionTierScoresCount", selectionTierScoresCount)) localMinimumEntries = adjustedProvidersPerTier @@ -277,12 +333,11 @@ func (po *ProviderOptimizer) ChooseProvider(allAddresses []string, ignoredProvid // TODO: add penalty if a provider is chosen too much selectedProvider := po.selectionWeighter.WeightedChoice(tierProviders) returnedProviders := []string{selectedProvider} - if explorationCandidate.address != "" && po.shouldExplore(1, selectionTierScoresCount) { + if explorationCandidate.address != "" && po.shouldExplore(1) { returnedProviders = append(returnedProviders, explorationCandidate.address) } utils.LavaFormatTrace("[Optimizer] returned providers", utils.LogAttr("providers", strings.Join(returnedProviders, ",")), - utils.LogAttr("cu", cu), utils.LogAttr("shiftedChances", shiftedChances), utils.LogAttr("tier", tier), ) @@ -290,6 +345,44 @@ func (po *ProviderOptimizer) ChooseProvider(allAddresses []string, ignoredProvid return returnedProviders, tier } +// CalculateProbabilityOfBlockError calculates the probability that a provider doesn't a specific requested +// block when the consumer asks the optimizer to fetch a provider with the specific block +func (po *ProviderOptimizer) CalculateProbabilityOfBlockError(requestedBlock int64, providerData ProviderData) sdk.Dec { + probabilityBlockError := float64(0) + // if there is no syncBlock data we assume successful relays so we don't over fit providers who were lucky to update + if requestedBlock > 0 && providerData.SyncBlock < uint64(requestedBlock) && providerData.SyncBlock > 0 { + // requested a specific block, so calculate a probability of provider having that block + averageBlockTime := po.averageBlockTime.Seconds() + blockDistanceRequired := uint64(requestedBlock) - providerData.SyncBlock + if blockDistanceRequired > 0 { + timeSinceSyncReceived := time.Since(providerData.Sync.GetLastUpdateTime()).Seconds() + eventRate := timeSinceSyncReceived / averageBlockTime // a new block every average block time, numerator is time passed, gamma=rt + // probValueAfterRepetitions(k,lambda) calculates the probability for k events or less meaning p(x<=k), + // an error occurs if we didn't have enough blocks, so the chance of error is p(x= po.wantedNumProvidersInConcurrency { +// shouldExplore determines whether the optimizer should continue exploring +// after finding an appropriate provider for pairing. +// The exploration mechanism makes the optimizer return providers that were not talking +// to the consumer for a long time (a couple of seconds). This allows better distribution +// of paired providers by avoiding returning the same best providers over and over. +// Note, the legacy disributed strategy acts as the default balanced strategy +func (po *ProviderOptimizer) shouldExplore(currentNumProviders int) bool { + if uint(currentNumProviders) >= po.wantedNumProvidersInConcurrency { return false } - explorationChance := DEFAULT_EXPLORATION_CHANCE + explorationChance := DefaultExplorationChance switch po.strategy { - case STRATEGY_LATENCY: + case StrategyLatency: return true // we want a lot of parallel tries on latency - case STRATEGY_ACCURACY: + case StrategyAccuracy: return true - case STRATEGY_COST: - explorationChance = COST_EXPLORATION_CHANCE - case STRATEGY_DISTRIBUTED: - explorationChance = DEFAULT_EXPLORATION_CHANCE * 0.25 - case STRATEGY_PRIVACY: + case StrategyCost: + explorationChance = CostExplorationChance + case StrategyDistributed: + explorationChance = DefaultExplorationChance * 0.25 + case StrategyPrivacy: return false // only one at a time } return rand.Float64() < explorationChance } -func (po *ProviderOptimizer) isBetterProviderScore(latencyScore, latencyScoreCurrent, syncScore, syncScoreCurrent float64) bool { - switch po.strategy { - case STRATEGY_PRIVACY: - // pick at random regardless of score - if rand.Intn(2) == 0 { - return true - } - return false - } - if syncScoreCurrent == 0 { - return latencyScore > latencyScoreCurrent - } - return po.calcProviderScore(latencyScore, syncScore) > po.calcProviderScore(latencyScoreCurrent, syncScoreCurrent) -} - -func (po *ProviderOptimizer) calcProviderScore(latencyScore, syncScore float64) float64 { - var latencyWeight float64 - switch po.strategy { - case STRATEGY_LATENCY: - latencyWeight = 0.7 - case STRATEGY_SYNC_FRESHNESS: - latencyWeight = 0.2 - default: - latencyWeight = 0.6 - } - return latencyScore*latencyWeight + syncScore*(1-latencyWeight) -} - -func (po *ProviderOptimizer) calculateSyncScore(syncScore score.ScoreStore) float64 { - var historicalSyncLatency time.Duration - if syncScore.Denom == 0 { - historicalSyncLatency = 0 - } else { - historicalSyncLatency = time.Duration(syncScore.Num / syncScore.Denom * float64(po.averageBlockTime)) // give it units of block time - } - return historicalSyncLatency.Seconds() -} - -func (po *ProviderOptimizer) calculateLatencyScore(providerData ProviderData, cu uint64, requestedBlock int64) float64 { - baseLatency := po.baseWorldLatency + common.BaseTimePerCU(cu)/2 // divide by two because the returned time is for timeout not for average - timeoutDuration := common.GetTimePerCu(cu) + common.AverageWorldLatency - var historicalLatency time.Duration - if providerData.Latency.Denom == 0 { - historicalLatency = baseLatency - } else { - historicalLatency = time.Duration(float64(baseLatency) * providerData.Latency.Num / providerData.Latency.Denom) - } - if historicalLatency > timeoutDuration { - // can't have a bigger latency than timeout - historicalLatency = timeoutDuration - } - probabilityBlockError := po.CalculateProbabilityOfBlockError(requestedBlock, providerData) - probabilityOfTimeout := po.CalculateProbabilityOfTimeout(providerData.Availability) - probabilityOfSuccess := (1 - probabilityBlockError) * (1 - probabilityOfTimeout) - - // base latency is how much time it would cost to an average performing provider - // timeoutDuration is the extra time we pay for a non responsive provider - // historicalLatency is how much we are paying for the processing of this provider - - // in case of block error we are paying the time cost of this provider and the time cost of the next provider on retry - costBlockError := historicalLatency.Seconds() + baseLatency.Seconds() - if probabilityBlockError > 0.5 { - costBlockError *= 3 // consistency improvement - } - // in case of a time out we are paying the time cost of a timeout and the time cost of the next provider on retry - costTimeout := timeoutDuration.Seconds() + baseLatency.Seconds() - // on success we are paying the time cost of this provider - costSuccess := historicalLatency.Seconds() - - utils.LavaFormatTrace("latency calculation breakdown", - utils.LogAttr("probabilityBlockError", probabilityBlockError), - utils.LogAttr("costBlockError", costBlockError), - utils.LogAttr("probabilityOfTimeout", probabilityOfTimeout), - utils.LogAttr("costTimeout", costTimeout), - utils.LogAttr("probabilityOfSuccess", probabilityOfSuccess), - utils.LogAttr("costSuccess", costSuccess), - ) - - return probabilityBlockError*costBlockError + probabilityOfTimeout*costTimeout + probabilityOfSuccess*costSuccess -} - -func (po *ProviderOptimizer) CalculateProbabilityOfTimeout(availabilityScore score.ScoreStore) float64 { - probabilityTimeout := float64(0) - if availabilityScore.Denom > 0 { // shouldn't happen since we have default values but protect just in case - mean := availabilityScore.Num / availabilityScore.Denom - // bernoulli distribution assumption means probability of '1' is the mean, success is 1 - return 1 - mean - } - return probabilityTimeout -} - -func (po *ProviderOptimizer) CalculateProbabilityOfBlockError(requestedBlock int64, providerData ProviderData) float64 { - probabilityBlockError := float64(0) - // if there is no syncBlock data we assume successful relays so we don't over fit providers who were lucky to update - if requestedBlock > 0 && providerData.SyncBlock < uint64(requestedBlock) && providerData.SyncBlock > 0 { - // requested a specific block, so calculate a probability of provider having that block - averageBlockTime := po.averageBlockTime.Seconds() - blockDistanceRequired := uint64(requestedBlock) - providerData.SyncBlock - if blockDistanceRequired > 0 { - timeSinceSyncReceived := time.Since(providerData.Sync.Time).Seconds() - eventRate := timeSinceSyncReceived / averageBlockTime // a new block every average block time, numerator is time passed, gamma=rt - // probValueAfterRepetitions(k,lambda) calculates the probability for k events or less meaning p(x<=k), - // an error occurs if we didn't have enough blocks, so the chance of error is p(x halfTime { halfTime = relaysHalfTime } - if halfTime > MAX_HALF_TIME { - halfTime = MAX_HALF_TIME + if halfTime > score.MaxHalfTime { + halfTime = score.MaxHalfTime } return halfTime } +// getRelayStatsTimeDiff returns the time passed since the provider optimizer's saved relay times median func (po *ProviderOptimizer) getRelayStatsTimeDiff(providerAddress string, sampleTime time.Time) time.Duration { times := po.getRelayStatsTimes(providerAddress) if len(times) == 0 { @@ -539,7 +541,11 @@ func (po *ProviderOptimizer) getRelayStatsTimeDiff(providerAddress string, sampl if medianTime.Before(sampleTime) { return sampleTime.Sub(medianTime) } - utils.LavaFormatWarning("did not use sample time in optimizer calculation", nil) + utils.LavaFormatWarning("did not use sample time in optimizer calculation", nil, + utils.LogAttr("median", medianTime.UTC().Unix()), + utils.LogAttr("sample", sampleTime.UTC().Unix()), + utils.LogAttr("diff", sampleTime.UTC().Unix()-medianTime.UTC().Unix()), + ) return time.Since(medianTime) } @@ -555,7 +561,7 @@ func (po *ProviderOptimizer) getRelayStatsTimes(providerAddress string) []time.T return nil } -func NewProviderOptimizer(strategy Strategy, averageBlockTIme, baseWorldLatency time.Duration, wantedNumProvidersInConcurrency uint, consumerOptimizerQoSClientInf consumerOptimizerQoSClientInf, chainId string) *ProviderOptimizer { +func NewProviderOptimizer(strategy Strategy, averageBlockTIme time.Duration, wantedNumProvidersInConcurrency uint, consumerOptimizerQoSClient consumerOptimizerQoSClientInf, chainId string) *ProviderOptimizer { cache, err := ristretto.NewCache(&ristretto.Config[string, any]{NumCounters: CacheNumCounters, MaxCost: CacheMaxCost, BufferItems: 64, IgnoreInternalCost: true}) if err != nil { utils.LavaFormatFatal("failed setting up cache for queries", err) @@ -564,7 +570,7 @@ func NewProviderOptimizer(strategy Strategy, averageBlockTIme, baseWorldLatency if err != nil { utils.LavaFormatFatal("failed setting up cache for queries", err) } - if strategy == STRATEGY_PRIVACY { + if strategy == StrategyPrivacy { // overwrite wantedNumProvidersInConcurrency = 1 } @@ -572,78 +578,76 @@ func NewProviderOptimizer(strategy Strategy, averageBlockTIme, baseWorldLatency strategy: strategy, providersStorage: cache, averageBlockTime: averageBlockTIme, - baseWorldLatency: baseWorldLatency, providerRelayStats: relayCache, wantedNumProvidersInConcurrency: wantedNumProvidersInConcurrency, selectionWeighter: NewSelectionWeighter(), OptimizerNumTiers: OptimizerNumTiers, - consumerOptimizerQoSClient: consumerOptimizerQoSClientInf, + OptimizerMinTierEntries: MinimumEntries, + consumerOptimizerQoSClient: consumerOptimizerQoSClient, chainId: chainId, } } -// calculate the probability a random variable with a poisson distribution -// poisson distribution calculates the probability of K events, in this case the probability enough blocks pass and the request will be accessible in the block - -func CumulativeProbabilityFunctionForPoissonDist(k_events uint64, lambda float64) float64 { - // calculate cumulative probability of observing k events (having k or more events): - // GammaIncReg is the lower incomplete gamma function GammaIncReg(a,x) = (1/ Γ(a)) \int_0^x e^{-t} t^{a-1} dt - // the CPF for k events (less than equal k) is the regularized upper incomplete gamma function - // so to get the CPF we need to return 1 - prob - argument := float64(k_events + 1) - if argument <= 0 || lambda < 0 { - utils.LavaFormatFatal("invalid function arguments", nil, utils.Attribute{Key: "argument", Value: argument}, utils.Attribute{Key: "lambda", Value: lambda}) +func (po *ProviderOptimizer) GetExcellenceQoSReportForProvider(providerAddress string) (report *pairingtypes.QualityOfServiceReport, lastUpdateTime time.Time) { + providerData, found := po.getProviderData(providerAddress) + if !found { + utils.LavaFormatWarning("provider data not found, using default", nil, utils.LogAttr("address", providerAddress)) } - prob := mathext.GammaIncReg(argument, lambda) - return 1 - prob -} -func pertrubWithNormalGaussian(orig, percentage float64) float64 { - perturb := rand.NormFloat64() * percentage * orig - return orig + perturb -} + latency, err := providerData.Latency.Resolve() + if err != nil { + utils.LavaFormatError("could not resolve latency score", err, utils.LogAttr("address", providerAddress)) + return nil, time.Time{} + } + if latency > score.WorstLatencyScore { + latency = score.WorstLatencyScore + } -func (po *ProviderOptimizer) GetExcellenceQoSReportForProvider(providerAddress string) (qosReport *pairingtypes.QualityOfServiceReport, rawQosReport *pairingtypes.QualityOfServiceReport) { - providerData, found := po.getProviderData(providerAddress) - if !found { - return nil, nil + sync, err := providerData.Sync.Resolve() + if err != nil { + utils.LavaFormatError("could not resolve sync score", err, utils.LogAttr("address", providerAddress)) + return nil, time.Time{} } - precision := WANTED_PRECISION - latencyScore := turnFloatToDec(providerData.Latency.Num/providerData.Latency.Denom, precision) - syncScore := turnFloatToDec(providerData.Sync.Num/providerData.Sync.Denom, precision) - // if our sync score is un initialized due to lack of providers - if syncScore.IsZero() { - syncScore = sdk.OneDec() + if sync == 0 { + // if our sync score is uninitialized due to lack of providers + // note, we basically penalize perfect providers, but assigning the sync score to 1 + // is making it 1ms, which is a very low value that doesn't harm the provider's score + // too much + sync = 1 + } else if sync > score.WorstSyncScore { + sync = score.WorstSyncScore } - availabilityScore := turnFloatToDec(providerData.Availability.Num/providerData.Availability.Denom, precision) - ret := &pairingtypes.QualityOfServiceReport{ - Latency: latencyScore, - Availability: availabilityScore, - Sync: syncScore, + + availability, err := providerData.Availability.Resolve() + if err != nil { + utils.LavaFormatError("could not resolve availability score", err, utils.LogAttr("address", providerAddress)) + return nil, time.Time{} } - latencyScoreRaw := turnFloatToDec(providerData.LatencyRaw.Num/providerData.LatencyRaw.Denom, precision) - syncScoreRaw := turnFloatToDec(providerData.SyncRaw.Num/providerData.SyncRaw.Denom, precision) - rawQosReport = &pairingtypes.QualityOfServiceReport{ - Latency: latencyScoreRaw, - Availability: availabilityScore, - Sync: syncScoreRaw, + report = &pairingtypes.QualityOfServiceReport{ + Latency: score.ConvertToDec(latency), + Availability: score.ConvertToDec(availability), + Sync: score.ConvertToDec(sync), } - utils.LavaFormatTrace("QoS Excellence for provider", + utils.LavaFormatTrace("[Optimizer] QoS Excellence for provider", utils.LogAttr("address", providerAddress), - utils.LogAttr("Report", ret), - utils.LogAttr("raw_report", rawQosReport), + utils.LogAttr("report", report), ) - return ret, rawQosReport + return report, providerData.Latency.GetLastUpdateTime() } -func turnFloatToDec(floatNum float64, precision int64) sdk.Dec { - integerNum := int64(math.Round(floatNum * math.Pow(10, float64(precision)))) - return sdk.NewDecWithPrec(integerNum, precision) -} +func (po *ProviderOptimizer) CalculateQoSScoresForMetrics(allAddresses []string, ignoredProviders map[string]struct{}, cu uint64, requestedBlock int64) []*metrics.OptimizerQoSReport { + selectionTier, _, providersScores := po.CalculateSelectionTiers(allAddresses, ignoredProviders, cu, requestedBlock) + reports := []*metrics.OptimizerQoSReport{} -func (po *ProviderOptimizer) Strategy() Strategy { - return po.strategy + rawScores := selectionTier.GetRawScores() + for idx, entry := range rawScores { + qosReport := providersScores[entry.Address] + qosReport.EntryIndex = idx + reports = append(reports, qosReport) + } + + return reports } diff --git a/protocol/provideroptimizer/provider_optimizer_test.go b/protocol/provideroptimizer/provider_optimizer_test.go index 0e857ec16b..a7a78969ef 100644 --- a/protocol/provideroptimizer/provider_optimizer_test.go +++ b/protocol/provideroptimizer/provider_optimizer_test.go @@ -1,18 +1,13 @@ package provideroptimizer import ( - "context" - "fmt" - "net/http" - "net/http/httptest" "strconv" - "sync" "testing" "time" - "github.com/goccy/go-json" - "github.com/lavanet/lava/v4/protocol/metrics" + "cosmossdk.io/math" "github.com/lavanet/lava/v4/utils" + "github.com/lavanet/lava/v4/utils/lavaslices" "github.com/lavanet/lava/v4/utils/rand" spectypes "github.com/lavanet/lava/v4/x/spec/types" "github.com/stretchr/testify/require" @@ -20,32 +15,12 @@ import ( const ( TEST_AVERAGE_BLOCK_TIME = 10 * time.Second - TEST_BASE_WORLD_LATENCY = 150 * time.Millisecond + TEST_BASE_WORLD_LATENCY = 10 * time.Millisecond // same as score.DefaultLatencyNum ) -type providerOptimizerSyncCache struct { - value map[interface{}]interface{} - lock sync.RWMutex -} - -func (posc *providerOptimizerSyncCache) Get(key interface{}) (interface{}, bool) { - posc.lock.RLock() - defer posc.lock.RUnlock() - ret, ok := posc.value[key] - return ret, ok -} - -func (posc *providerOptimizerSyncCache) Set(key, value interface{}, cost int64) bool { - posc.lock.Lock() - defer posc.lock.Unlock() - posc.value[key] = value - return true -} - -func setupProviderOptimizer(maxProvidersCount int) *ProviderOptimizer { +func setupProviderOptimizer(maxProvidersCount uint) *ProviderOptimizer { averageBlockTIme := TEST_AVERAGE_BLOCK_TIME - baseWorldLatency := TEST_BASE_WORLD_LATENCY - return NewProviderOptimizer(STRATEGY_BALANCED, averageBlockTIme, baseWorldLatency, uint(maxProvidersCount), nil, "dontcare") + return NewProviderOptimizer(StrategyBalanced, averageBlockTIme, maxProvidersCount, nil, "test") } type providersGenerator struct { @@ -60,63 +35,9 @@ func (pg *providersGenerator) setupProvidersForTest(count int) *providersGenerat return pg } -func TestProbabilitiesCalculations(t *testing.T) { - value := CumulativeProbabilityFunctionForPoissonDist(1, 10) - value2 := CumulativeProbabilityFunctionForPoissonDist(10, 10) - require.Greater(t, value2, value) - - playbook := []struct { - name string - blockGap uint64 - averageBlockTime time.Duration - timeHas time.Duration - expectedProbabilityHigherLimit float64 - expectedProbabilityLowerLimit float64 - }{ - { - name: "one", - blockGap: 1, - averageBlockTime: 6 * time.Second, - timeHas: 25 * time.Second, - expectedProbabilityHigherLimit: 0.3, - expectedProbabilityLowerLimit: 0, - }, - { - name: "five", - blockGap: 5, - averageBlockTime: 6 * time.Second, - timeHas: 6 * time.Second, - expectedProbabilityHigherLimit: 1, - expectedProbabilityLowerLimit: 0.7, - }, - { - name: "tight", - blockGap: 5, - averageBlockTime: 6 * time.Second, - timeHas: 30 * time.Second, - expectedProbabilityHigherLimit: 0.5, - expectedProbabilityLowerLimit: 0.4, - }, - { - name: "almost there", - blockGap: 1, - averageBlockTime: 6 * time.Second, - timeHas: 6 * time.Second, - expectedProbabilityHigherLimit: 0.4, - expectedProbabilityLowerLimit: 0.3, - }, - } - for _, tt := range playbook { - t.Run(tt.name, func(t *testing.T) { - eventRate := tt.timeHas.Seconds() / tt.averageBlockTime.Seconds() - probabilityBlockError := CumulativeProbabilityFunctionForPoissonDist(tt.blockGap-1, eventRate) - require.LessOrEqual(t, probabilityBlockError, tt.expectedProbabilityHigherLimit) - require.GreaterOrEqual(t, probabilityBlockError, tt.expectedProbabilityLowerLimit) - }) - } -} - -func TestProviderOptimizerSetGet(t *testing.T) { +// TestProviderOptimizerProviderDataSetGet tests that the providerData +// Get and Set methods work as expected +func TestProviderOptimizerProviderDataSetGet(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersGen := (&providersGenerator{}).setupProvidersForTest(1) providerAddress := providersGen.providersAddresses[0] @@ -137,44 +58,64 @@ func TestProviderOptimizerSetGet(t *testing.T) { } } -func TestProviderOptimizerBasic(t *testing.T) { +// TestProviderOptimizerBasicProbeData tests the basic provider optimizer operation +// when it is updated with probe relays. Providers with bad scores should have a worse chance +// to be picked (and vice versa). +// Scenario: +// 0. There are 10 providers, the optimizer is configured to pick a single provider +// 1. Choose between 10 identical providers -> none should be in the worst tier +// 2. Append bad probe relay data for providers 5-7 and pick providers -> should not be 6-8 +// 3. Append good probe relay data for providers 0-2 and pick providers -> should often be 0-2 +func TestProviderOptimizerBasicProbeData(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersGen := (&providersGenerator{}).setupProvidersForTest(10) rand.InitRandomSeed() - - requestCU := uint64(10) + cu := uint64(10) requestBlock := int64(1000) - returnedProviders, tier := providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, requestCU, requestBlock) + // choose between 10 identical providers, none should be in the worst tier + returnedProviders, tier := providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, cu, requestBlock) require.Equal(t, 1, len(returnedProviders)) require.NotEqual(t, 4, tier) - // damage their chance to be selected by placing them in the worst tier - providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[5], TEST_BASE_WORLD_LATENCY*3, true) - providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[6], TEST_BASE_WORLD_LATENCY*3, true) - providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[7], TEST_BASE_WORLD_LATENCY*3, true) + + // damage providers 5-7 scores with bad latency probes relays + // they should not be selected by the optimizer and should be in the worst tier + badLatency := TEST_BASE_WORLD_LATENCY * 3 + providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[5], badLatency, true) + providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[6], badLatency, true) + providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[7], badLatency, true) time.Sleep(4 * time.Millisecond) - returnedProviders, _ = providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, requestCU, requestBlock) + returnedProviders, _ = providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, cu, requestBlock) require.Equal(t, 1, len(returnedProviders)) require.NotEqual(t, 4, tier) require.NotEqual(t, returnedProviders[0], providersGen.providersAddresses[5]) // we shouldn't pick the worst provider require.NotEqual(t, returnedProviders[0], providersGen.providersAddresses[6]) // we shouldn't pick the worst provider require.NotEqual(t, returnedProviders[0], providersGen.providersAddresses[7]) // we shouldn't pick the worst provider - // improve selection chance by placing them in the top tier - providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[0], TEST_BASE_WORLD_LATENCY/2, true) - providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[1], TEST_BASE_WORLD_LATENCY/2, true) - providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[2], TEST_BASE_WORLD_LATENCY/2, true) + + // improve providers 0-2 scores with good latency probes relays + // they should be selected by the optimizer more often and should be in the best tier + goodLatency := TEST_BASE_WORLD_LATENCY / 2 + providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[0], goodLatency, true) + providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[1], goodLatency, true) + providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[2], goodLatency, true) time.Sleep(4 * time.Millisecond) - results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) - require.Greater(t, tierResults[0], 650, tierResults) // we should pick the best tier most often - // out of 10 providers, and with 3 in the top tier we should pick 0 around a third of that - require.Greater(t, results[providersGen.providersAddresses[0]], 250, results) // we should pick the best tier most often + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) + require.Greater(t, tierResults[0], 600, tierResults) // we should pick the best tier most often + + // out of 10 providers, and with 3 providers in the top tier we should pick + // tier-0 providers around a third of that + require.Greater(t, results[providersGen.providersAddresses[0]], 200, results) // we should pick the best tier most often } -func runChooseManyTimesAndReturnResults(t *testing.T, providerOptimizer *ProviderOptimizer, providers []string, ignoredProviders map[string]struct{}, requestCU uint64, requestBlock int64, times int) (map[string]int, map[int]int) { +// runChooseManyTimesAndReturnResults uses the given optimizer and providers addresses +// to pick providers times and return two results maps: +// 1. results: map of provider address to the number of times it was picked +// 2. tierResults: map of tier and the number of times a provider from the specific tier was picked +func runChooseManyTimesAndReturnResults(t *testing.T, providerOptimizer *ProviderOptimizer, providers []string, ignoredProviders map[string]struct{}, times int, cu uint64, requestBlock int64) (map[string]int, map[int]int) { tierResults := make(map[int]int) results := make(map[string]int) for i := 0; i < times; i++ { - returnedProviders, tier := providerOptimizer.ChooseProvider(providers, ignoredProviders, requestCU, requestBlock) + returnedProviders, tier := providerOptimizer.ChooseProvider(providers, ignoredProviders, cu, requestBlock) require.Equal(t, 1, len(returnedProviders)) results[returnedProviders[0]]++ tierResults[tier]++ @@ -182,50 +123,97 @@ func runChooseManyTimesAndReturnResults(t *testing.T, providerOptimizer *Provide return results, tierResults } +// TestProviderOptimizerBasicRelayData tests the basic provider optimizer operation +// when it is updated with regular relays. Providers with bad scores should have a worse chance +// to be picked (and vice versa). +// Scenario: +// 0. There are 10 providers, the optimizer is configured to pick a single provider +// 1. Choose between 10 identical providers -> none should be in the worst tier +// 2. Append bad relay data for providers 5-7 and pick providers -> should not be 6-8 +// 3. Append good relay data for providers 0-2 and pick providers -> should often be 0-2 func TestProviderOptimizerBasicRelayData(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersGen := (&providersGenerator{}).setupProvidersForTest(10) rand.InitRandomSeed() - requestCU := uint64(1) + cu := uint64(1) requestBlock := int64(1000) - syncBlock := uint64(requestBlock) - providerOptimizer.AppendRelayData(providersGen.providersAddresses[5], TEST_BASE_WORLD_LATENCY*4, false, requestCU, syncBlock) - providerOptimizer.AppendRelayData(providersGen.providersAddresses[6], TEST_BASE_WORLD_LATENCY*4, false, requestCU, syncBlock) - providerOptimizer.AppendRelayData(providersGen.providersAddresses[7], TEST_BASE_WORLD_LATENCY*4, false, requestCU, syncBlock) - time.Sleep(4 * time.Millisecond) - returnedProviders, tier := providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, requestCU, requestBlock) + // choose between 10 identical providers, none should be in the worst tier + returnedProviders, tier := providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, cu, requestBlock) require.Equal(t, 1, len(returnedProviders)) - // we shouldn't pick the low tier providers - require.NotEqual(t, tier, 3) - require.NotEqual(t, returnedProviders[0], providersGen.providersAddresses[5], tier) - require.NotEqual(t, returnedProviders[0], providersGen.providersAddresses[6], tier) - require.NotEqual(t, returnedProviders[0], providersGen.providersAddresses[7], tier) - - providerOptimizer.AppendRelayData(providersGen.providersAddresses[0], TEST_BASE_WORLD_LATENCY/4, false, requestCU, syncBlock) - providerOptimizer.AppendRelayData(providersGen.providersAddresses[1], TEST_BASE_WORLD_LATENCY/4, false, requestCU, syncBlock) - providerOptimizer.AppendRelayData(providersGen.providersAddresses[2], TEST_BASE_WORLD_LATENCY/4, false, requestCU, syncBlock) + require.NotEqual(t, 4, tier) + + // damage providers 5-7 scores with bad latency relays + // they should not be selected by the optimizer and should be in the worst tier + badLatency := TEST_BASE_WORLD_LATENCY * 3 + providerOptimizer.AppendRelayData(providersGen.providersAddresses[5], badLatency, cu, syncBlock) + providerOptimizer.AppendRelayData(providersGen.providersAddresses[6], badLatency, cu, syncBlock) + providerOptimizer.AppendRelayData(providersGen.providersAddresses[7], badLatency, cu, syncBlock) time.Sleep(4 * time.Millisecond) - results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) + returnedProviders, tier = providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, cu, requestBlock) + require.Equal(t, 1, len(returnedProviders)) + + // there's a chance that some of the worst providers will be in part of a higher tier + // because of a high minimum entries value, so filter the providers that are only in the worst tier + selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) + tier3Entries := selectionTier.GetTier(3, providerOptimizer.OptimizerNumTiers, 1) + tier2Entries := selectionTier.GetTier(2, providerOptimizer.OptimizerNumTiers, 1) + worstTierEntries := map[string]struct{}{} + for _, entry := range tier3Entries { + // verify that the worst providers are the ones with the bad latency + if entry.Address != providersGen.providersAddresses[5] && + entry.Address != providersGen.providersAddresses[6] && + entry.Address != providersGen.providersAddresses[7] { + t.Fatalf("entry %s is not in the worst tier", entry.Address) + } + worstTierEntries[entry.Address] = struct{}{} + } + for _, entry := range tier2Entries { + // remove the providers that are also in tier 2 + delete(worstTierEntries, entry.Address) + } + + require.NotEqual(t, tier, 3) // we shouldn't pick the low tier providers + for address := range worstTierEntries { + require.NotEqual(t, returnedProviders[0], address) + } - require.Zero(t, results[providersGen.providersAddresses[5]]) - require.Zero(t, results[providersGen.providersAddresses[6]]) - require.Zero(t, results[providersGen.providersAddresses[7]]) + // improve providers 0-2 scores with good latency probes relays + // they should be selected by the optimizer more often and should be in the best tier + goodLatency := TEST_BASE_WORLD_LATENCY / 2 + providerOptimizer.AppendRelayData(providersGen.providersAddresses[0], goodLatency, cu, syncBlock) + providerOptimizer.AppendRelayData(providersGen.providersAddresses[1], goodLatency, cu, syncBlock) + providerOptimizer.AppendRelayData(providersGen.providersAddresses[2], goodLatency, cu, syncBlock) + time.Sleep(4 * time.Millisecond) + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) + require.Greater(t, tierResults[0], 600, tierResults) // we should pick the best tier most often - require.Greater(t, tierResults[0], 650, tierResults) // we should pick the best tier most often // out of 10 providers, and with 3 in the top tier we should pick 0 around a third of that - require.Greater(t, results[providersGen.providersAddresses[0]], 250, results) // we should pick the best tier most often + require.Greater(t, results[providersGen.providersAddresses[0]], 200, results) + + // the bad providers shouldn't have been picked even once + for address := range worstTierEntries { + require.Zero(t, results[address]) + } } -func TestProviderOptimizerAvailability(t *testing.T) { +// TestProviderOptimizerAvailabilityProbeData tests the availability update when +// the optimizer is updated with failed probe relays. Providers with bad scores should have +// a worse chance to be picked (and vice versa). +// Scenario: +// 0. There are 100 providers, the optimizer is configured to pick a single provider +// 1. Append bad probe relay data for all provider but random three +// 2. Pick providers and check they're picked most often +func TestProviderOptimizerAvailabilityProbeData(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersCount := 100 + cu := uint64(1) + requestBlock := int64(1000) providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) rand.InitRandomSeed() - requestCU := uint64(10) - requestBlock := int64(1000) + // damage all the providers scores with failed probe relays but three random ones skipIndex := rand.Intn(providersCount - 3) providerOptimizer.OptimizerNumTiers = 33 // set many tiers so good providers can stand out in the test for i := range providersGen.providersAddresses { @@ -236,23 +224,35 @@ func TestProviderOptimizerAvailability(t *testing.T) { } providerOptimizer.AppendProbeRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY, false) } + + // pick providers, the three random ones should be top-tier and picked more often time.Sleep(4 * time.Millisecond) - results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) require.Greater(t, tierResults[0], 300, tierResults) // 0.42 chance for top tier due to the algorithm to rebalance chances require.Greater(t, results[providersGen.providersAddresses[skipIndex]]+results[providersGen.providersAddresses[skipIndex+1]]+results[providersGen.providersAddresses[skipIndex+2]], 275) require.InDelta(t, results[providersGen.providersAddresses[skipIndex]], results[providersGen.providersAddresses[skipIndex+1]], 50) - results, _ = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[skipIndex]: {}}, requestCU, requestBlock, 1000) + + // pick providers again but this time ignore one of the random providers, it shouldn't be picked + results, _ = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[skipIndex]: {}}, 1000, cu, requestBlock) require.Zero(t, results[providersGen.providersAddresses[skipIndex]]) } +// TestProviderOptimizerAvailabilityProbeData tests the availability update when +// the optimizer is updated with failed relays. Providers with bad scores should have +// a worse chance to be picked (and vice versa). +// Scenario: +// 0. There are 100 providers, the optimizer is configured to pick a single provider +// 1. Append bad probe relay data for all provider but random three +// 2. Pick providers and check they're picked most often func TestProviderOptimizerAvailabilityRelayData(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersCount := 100 + cu := uint64(10) + requestBlock := int64(1000) providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) rand.InitRandomSeed() - requestCU := uint64(10) - requestBlock := int64(1000) + // damage all the providers scores with failed probe relays but three random ones skipIndex := rand.Intn(providersCount - 3) providerOptimizer.OptimizerNumTiers = 33 // set many tiers so good providers can stand out in the test for i := range providersGen.providersAddresses { @@ -263,12 +263,16 @@ func TestProviderOptimizerAvailabilityRelayData(t *testing.T) { } providerOptimizer.AppendRelayFailure(providersGen.providersAddresses[i]) } + + // pick providers, the three random ones should be top-tier and picked more often time.Sleep(4 * time.Millisecond) - results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) require.Greater(t, tierResults[0], 300, tierResults) // 0.42 chance for top tier due to the algorithm to rebalance chances require.Greater(t, results[providersGen.providersAddresses[skipIndex]]+results[providersGen.providersAddresses[skipIndex+1]]+results[providersGen.providersAddresses[skipIndex+2]], 270) require.InDelta(t, results[providersGen.providersAddresses[skipIndex]], results[providersGen.providersAddresses[skipIndex+1]], 50) - results, _ = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[skipIndex]: {}}, requestCU, requestBlock, 1000) + + // pick providers again but this time ignore one of the random providers, it shouldn't be picked + results, _ = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[skipIndex]: {}}, 1000, cu, requestBlock) require.Zero(t, results[providersGen.providersAddresses[skipIndex]]) } @@ -277,34 +281,42 @@ func TestProviderOptimizerAvailabilityBlockError(t *testing.T) { providersCount := 10 providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) rand.InitRandomSeed() - requestCU := uint64(10) + cu := uint64(10) requestBlock := int64(1000) + syncBlock := uint64(1000) + badSyncBlock := syncBlock - 1 - syncBlock := uint64(requestBlock) + // damage all the providers scores with bad sync relays but three random ones + // the three random providers also get slightly worse latency + // bad sync means an update that doesn't have the latest requested block chosenIndex := rand.Intn(providersCount - 2) - for i := range providersGen.providersAddresses { time.Sleep(4 * time.Millisecond) if i == chosenIndex || i == chosenIndex+1 || i == chosenIndex+2 { - // give better syncBlock, worse latency by a little - providerOptimizer.AppendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY+10*time.Millisecond, false, requestCU, syncBlock) + slightlyBadLatency := TEST_BASE_WORLD_LATENCY + 1*time.Millisecond + providerOptimizer.AppendRelayData(providersGen.providersAddresses[i], slightlyBadLatency, cu, syncBlock) continue } - providerOptimizer.AppendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY, false, requestCU, syncBlock-1) // update that he doesn't have the latest requested block + providerOptimizer.AppendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY, cu, badSyncBlock) } + + // make the top tier chance to be 70% time.Sleep(4 * time.Millisecond) - selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, requestBlock) + selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) tierChances := selectionTier.ShiftTierChance(OptimizerNumTiers, map[int]float64{0: ATierChance, OptimizerNumTiers - 1: LastTierChance}) require.Greater(t, tierChances[0], 0.7, tierChances) - results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) - require.Greater(t, tierResults[0], 500, tierResults) // we should pick the best tier most often + + // pick providers, the top-tier should be picked picked more often (at least half the times) + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) + require.Greater(t, tierResults[0], 500, tierResults) + // out of 10 providers, and with 3 in the top tier we should pick 0 around a third of that - require.Greater(t, results[providersGen.providersAddresses[chosenIndex]], 200, results) // we should pick the best tier most often + require.Greater(t, results[providersGen.providersAddresses[chosenIndex]], 200, results) sumResults := results[providersGen.providersAddresses[chosenIndex]] + results[providersGen.providersAddresses[chosenIndex+1]] + results[providersGen.providersAddresses[chosenIndex+2]] require.Greater(t, sumResults, 500, results) // we should pick the best tier most often - // now try to get a previous block, our chosenIndex should be inferior in latency and blockError chance should be the same - results, tierResults = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock-1, 1000) + // now try to get a previous block, our chosenIndex should be inferior in latency and blockError chance should be the same + results, tierResults = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock-1) require.Greater(t, tierResults[0], 500, tierResults) // we should pick the best tier most often // out of 10 providers, and with 3 in the top tier we should pick 0 around a third of that require.Less(t, results[providersGen.providersAddresses[chosenIndex]], 50, results) // chosen indexes shoulnt be in the tier @@ -312,44 +324,72 @@ func TestProviderOptimizerAvailabilityBlockError(t *testing.T) { require.Less(t, sumResults, 150, results) // we should pick the best tier most often } -// TODO::PRT-1114 This needs to be fixed asap. currently commented out as it prevents pushing unrelated code -// Also on typescript sdk -// func TestProviderOptimizerUpdatingLatency(t *testing.T) { -// providerOptimizer := setupProviderOptimizer(1) -// providersCount := 2 -// providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) -// providerAddress := providersGen.providersAddresses[0] -// requestCU := uint64(10) -// requestBlock := int64(1000) -// syncBlock := uint64(requestBlock) -// providerOptimizer.providersStorage = &providerOptimizerSyncCache{value: map[interface{}]interface{}{}} -// // in this test we are repeatedly adding better results, and latency score should improve -// for i := 0; i < 10; i++ { -// providerData, _ := providerOptimizer.getProviderData(providerAddress) -// currentLatencyScore := providerOptimizer.calculateLatencyScore(providerData, requestCU, requestBlock) -// providerOptimizer.AppendProbeRelayData(providerAddress, TEST_BASE_WORLD_LATENCY, true) -// providerData, found := providerOptimizer.getProviderData(providerAddress) -// require.True(t, found) -// newLatencyScore := providerOptimizer.calculateLatencyScore(providerData, requestCU, requestBlock) -// require.Greater(t, currentLatencyScore, newLatencyScore, i) -// } -// providerAddress = providersGen.providersAddresses[1] -// for i := 0; i < 10; i++ { -// providerData, _ := providerOptimizer.getProviderData(providerAddress) -// currentLatencyScore := providerOptimizer.calculateLatencyScore(providerData, requestCU, requestBlock) -// providerOptimizer.AppendRelayData(providerAddress, TEST_BASE_WORLD_LATENCY, false, requestCU, syncBlock) -// providerData, found := providerOptimizer.getProviderData(providerAddress) -// require.True(t, found) -// newLatencyScore := providerOptimizer.calculateLatencyScore(providerData, requestCU, requestBlock) -// require.Greater(t, currentLatencyScore, newLatencyScore, i) -// } -// } +// TestProviderOptimizerUpdatingLatency tests checks that repeatedly adding better results +// (with both probes and relays) makes the latency score improve +func TestProviderOptimizerUpdatingLatency(t *testing.T) { + providerOptimizer := setupProviderOptimizer(1) + providersCount := 2 + providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) + providerAddress := providersGen.providersAddresses[0] + cu := uint64(10) + requestBlock := int64(1000) + syncBlock := uint64(requestBlock) + + // add an average latency probe relay to determine average score + providerOptimizer.AppendProbeRelayData(providerAddress, TEST_BASE_WORLD_LATENCY, true) + time.Sleep(4 * time.Millisecond) + + // add good latency probe relays, score should improve + for i := 0; i < 10; i++ { + // get current score + qos, _ := providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) + require.NotNil(t, qos) + score, err := qos.ComputeQoSExcellence() + require.NoError(t, err) + + // add good latency probe + providerOptimizer.AppendProbeRelayData(providerAddress, TEST_BASE_WORLD_LATENCY/10, true) + time.Sleep(4 * time.Millisecond) + + // check score again and compare to the last score + qos, _ = providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) + require.NotNil(t, qos) + newScore, err := qos.ComputeQoSExcellence() + require.NoError(t, err) + require.True(t, newScore.LT(score), "newScore: "+newScore.String()+", score: "+score.String()) + } + + // add an average latency probe relay to determine average score + providerAddress = providersGen.providersAddresses[1] + providerOptimizer.AppendRelayData(providerAddress, TEST_BASE_WORLD_LATENCY, cu, syncBlock) + time.Sleep(4 * time.Millisecond) + + // add good latency relays, score should improve + for i := 0; i < 10; i++ { + // get current score + qos, _ := providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) + require.NotNil(t, qos) + score, err := qos.ComputeQoSExcellence() + require.NoError(t, err) + + // add good latency relay + providerOptimizer.AppendRelayData(providerAddress, TEST_BASE_WORLD_LATENCY/10, cu, syncBlock) + time.Sleep(4 * time.Millisecond) + + // check score again and compare to the last score + qos, _ = providerOptimizer.GetExcellenceQoSReportForProvider(providerAddress) + require.NotNil(t, qos) + newScore, err := qos.ComputeQoSExcellence() + require.NoError(t, err) + require.True(t, newScore.LT(score), "newScore: "+newScore.String()+", score: "+score.String()) + } +} func TestProviderOptimizerExploration(t *testing.T) { providerOptimizer := setupProviderOptimizer(2) providersCount := 10 providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) - requestCU := uint64(10) + cu := uint64(10) requestBlock := int64(1000) syncBlock := uint64(requestBlock) @@ -359,7 +399,7 @@ func TestProviderOptimizerExploration(t *testing.T) { testProvidersExploration := func(iterations int) float64 { exploration := 0.0 for i := 0; i < iterations; i++ { - returnedProviders, _ := providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, requestCU, requestBlock) + returnedProviders, _ := providerOptimizer.ChooseProvider(providersGen.providersAddresses, nil, cu, requestBlock) if len(returnedProviders) > 1 { exploration++ // check if we have a specific chosen index @@ -378,7 +418,7 @@ func TestProviderOptimizerExploration(t *testing.T) { chosenIndex = rand.Intn(providersCount - 2) // set chosen index with a value in the past so it can be selected for exploration - providerOptimizer.appendRelayData(providersGen.providersAddresses[chosenIndex], TEST_BASE_WORLD_LATENCY*2, false, true, requestCU, syncBlock, time.Now().Add(-35*time.Second)) + providerOptimizer.appendRelayData(providersGen.providersAddresses[chosenIndex], TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, time.Now().Add(-35*time.Second)) // set a basic state for all other provider, with a recent time (so they can't be selected for exploration) for i := 0; i < 10; i++ { for index, address := range providersGen.providersAddresses { @@ -387,27 +427,27 @@ func TestProviderOptimizerExploration(t *testing.T) { continue } // set samples in the future so they are never a candidate for exploration - providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, false, true, requestCU, syncBlock, time.Now().Add(1*time.Second)) + providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, time.Now().Add(1*time.Second)) } time.Sleep(4 * time.Millisecond) } // with a cost strategy we expect exploration at a 10% rate - providerOptimizer.strategy = STRATEGY_BALANCED // that's the default but to be explicit + providerOptimizer.strategy = StrategyBalanced // that's the default but to be explicit providerOptimizer.wantedNumProvidersInConcurrency = 2 // that's in the constructor but to be explicit iterations := 10000 exploration = testProvidersExploration(iterations) - require.Less(t, exploration, float64(1.4)*float64(iterations)*DEFAULT_EXPLORATION_CHANCE) // allow mistake buffer of 40% because of randomness - require.Greater(t, exploration, float64(0.6)*float64(iterations)*DEFAULT_EXPLORATION_CHANCE) // allow mistake buffer of 40% because of randomness + require.Less(t, exploration, float64(1.4)*float64(iterations)*DefaultExplorationChance) // allow mistake buffer of 40% because of randomness + require.Greater(t, exploration, float64(0.6)*float64(iterations)*DefaultExplorationChance) // allow mistake buffer of 40% because of randomness // with a cost strategy we expect exploration to happen once in 100 samples - providerOptimizer.strategy = STRATEGY_COST + providerOptimizer.strategy = StrategyCost exploration = testProvidersExploration(iterations) - require.Less(t, exploration, float64(1.4)*float64(iterations)*COST_EXPLORATION_CHANCE) // allow mistake buffer of 40% because of randomness - require.Greater(t, exploration, float64(0.6)*float64(iterations)*COST_EXPLORATION_CHANCE) // allow mistake buffer of 40% because of randomness + require.Less(t, exploration, float64(1.4)*float64(iterations)*CostExplorationChance) // allow mistake buffer of 40% because of randomness + require.Greater(t, exploration, float64(0.6)*float64(iterations)*CostExplorationChance) // allow mistake buffer of 40% because of randomness // privacy disables exploration - providerOptimizer.strategy = STRATEGY_PRIVACY + providerOptimizer.strategy = StrategyPrivacy exploration = testProvidersExploration(iterations) require.Equal(t, exploration, float64(0)) } @@ -416,7 +456,7 @@ func TestProviderOptimizerSyncScore(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersGen := (&providersGenerator{}).setupProvidersForTest(10) rand.InitRandomSeed() - requestCU := uint64(10) + cu := uint64(10) requestBlock := spectypes.LATEST_BLOCK syncBlock := uint64(1000) @@ -428,22 +468,22 @@ func TestProviderOptimizerSyncScore(t *testing.T) { time.Sleep(4 * time.Millisecond) if i == chosenIndex { // give better syncBlock, latency is a tiny bit worse for the second check - providerOptimizer.appendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY*2+1*time.Microsecond, false, true, requestCU, syncBlock+5, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY*2+1*time.Microsecond, true, cu, syncBlock+5, sampleTime) continue } - providerOptimizer.appendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY*2, false, true, requestCU, syncBlock, sampleTime) // update that he doesn't have the latest requested block + providerOptimizer.appendRelayData(providersGen.providersAddresses[i], TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, sampleTime) // update that he doesn't have the latest requested block } sampleTime = sampleTime.Add(time.Millisecond * 5) } time.Sleep(4 * time.Millisecond) - selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, requestBlock) + selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) tier0 := selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty // we have the best score on the top tier and it's sorted require.Equal(t, providersGen.providersAddresses[chosenIndex], tier0[0].Address) // now choose with a specific block that all providers have - selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, int64(syncBlock)) + selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, int64(syncBlock)) tier0 = selectionTier.GetTier(0, 4, 3) for idx := range tier0 { // sync score doesn't matter now so the tier0 is recalculated and chosenIndex has worst latency @@ -456,7 +496,7 @@ func TestProviderOptimizerStrategiesScoring(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersCount := 10 providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) - requestCU := uint64(10) + cu := uint64(10) requestBlock := spectypes.LATEST_BLOCK syncBlock := uint64(1000) @@ -464,7 +504,7 @@ func TestProviderOptimizerStrategiesScoring(t *testing.T) { sampleTime := time.Now() for i := 0; i < 10; i++ { for _, address := range providersGen.providersAddresses { - providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, false, true, requestCU, syncBlock, sampleTime) + providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, sampleTime) } time.Sleep(4 * time.Millisecond) } @@ -485,123 +525,85 @@ func TestProviderOptimizerStrategiesScoring(t *testing.T) { } sampleTime = time.Now() - improvedLatency := 280 * time.Millisecond + improvedLatency := TEST_BASE_WORLD_LATENCY / 2 normalLatency := TEST_BASE_WORLD_LATENCY * 2 improvedBlock := syncBlock + 1 // provider 0 gets a good latency - providerOptimizer.appendRelayData(providersGen.providersAddresses[0], improvedLatency, false, true, requestCU, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[0], improvedLatency, true, cu, syncBlock, sampleTime) // providers 3,4 get a regular entry - providerOptimizer.appendRelayData(providersGen.providersAddresses[3], normalLatency, false, true, requestCU, syncBlock, sampleTime) - providerOptimizer.appendRelayData(providersGen.providersAddresses[4], normalLatency, false, true, requestCU, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[3], normalLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[4], normalLatency, true, cu, syncBlock, sampleTime) // provider 1 gets a good sync - providerOptimizer.appendRelayData(providersGen.providersAddresses[1], normalLatency, false, true, requestCU, improvedBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[1], normalLatency, true, cu, improvedBlock, sampleTime) sampleTime = sampleTime.Add(10 * time.Millisecond) // now repeat to modify all providers scores across sync calculation - providerOptimizer.appendRelayData(providersGen.providersAddresses[0], improvedLatency, false, true, requestCU, syncBlock, sampleTime) - providerOptimizer.appendRelayData(providersGen.providersAddresses[3], normalLatency, false, true, requestCU, syncBlock, sampleTime) - providerOptimizer.appendRelayData(providersGen.providersAddresses[4], normalLatency, false, true, requestCU, syncBlock, sampleTime) - providerOptimizer.appendRelayData(providersGen.providersAddresses[1], normalLatency, false, true, requestCU, improvedBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[0], improvedLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[3], normalLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[4], normalLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[1], normalLatency, true, cu, improvedBlock, sampleTime) time.Sleep(4 * time.Millisecond) - providerOptimizer.strategy = STRATEGY_BALANCED + providerOptimizer.strategy = StrategyBalanced // a balanced strategy should pick provider 2 because of it's high availability - selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, requestBlock) + selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) tier0 := selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty // we have the best score on the top tier and it's sorted require.Equal(t, providersGen.providersAddresses[2], tier0[0].Address) - providerOptimizer.strategy = STRATEGY_COST + providerOptimizer.strategy = StrategyCost // with a cost strategy we expect the same as balanced - selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, requestBlock) + selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) tier0 = selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty // we have the best score on the top tier and it's sorted require.Equal(t, providersGen.providersAddresses[2], tier0[0].Address) - providerOptimizer.strategy = STRATEGY_LATENCY + providerOptimizer.strategy = StrategyLatency // latency strategy should pick the best latency - selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[2]: {}}, requestCU, requestBlock) + selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[2]: {}}, cu, requestBlock) tier0 = selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty require.Equal(t, providersGen.providersAddresses[0], tier0[0].Address) - providerOptimizer.strategy = STRATEGY_SYNC_FRESHNESS + providerOptimizer.strategy = StrategySyncFreshness // freshness strategy should pick the most advanced provider - selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[2]: {}}, requestCU, requestBlock) + selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[2]: {}}, cu, requestBlock) tier0 = selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty require.Equal(t, providersGen.providersAddresses[1], tier0[0].Address) // but if we request a past block, then it doesnt matter and we choose by latency: - selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[2]: {}}, requestCU, int64(syncBlock)) + selectionTier, _, _ = providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, map[string]struct{}{providersGen.providersAddresses[2]: {}}, cu, int64(syncBlock)) tier0 = selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty require.Equal(t, providersGen.providersAddresses[0], tier0[0].Address) } func TestExcellence(t *testing.T) { - floatVal := 0.25 - dec := turnFloatToDec(floatVal, 8) - floatNew, err := dec.Float64() - require.NoError(t, err) - require.Equal(t, floatVal, floatNew) - providerOptimizer := setupProviderOptimizer(1) providersCount := 5 providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) - requestCU := uint64(10) + cu := uint64(10) syncBlock := uint64(1000) // set a basic state for all of them sampleTime := time.Now() for i := 0; i < 10; i++ { for _, address := range providersGen.providersAddresses { - providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, false, true, requestCU, syncBlock, sampleTime) + providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, sampleTime) } time.Sleep(4 * time.Millisecond) } - report, rawReport := providerOptimizer.GetExcellenceQoSReportForProvider(providersGen.providersAddresses[0]) + report, sampleTime1 := providerOptimizer.GetExcellenceQoSReportForProvider(providersGen.providersAddresses[0]) require.NotNil(t, report) - require.NotNil(t, rawReport) - report2, rawReport2 := providerOptimizer.GetExcellenceQoSReportForProvider(providersGen.providersAddresses[1]) + require.True(t, sampleTime.Equal(sampleTime1)) + report2, sampleTime2 := providerOptimizer.GetExcellenceQoSReportForProvider(providersGen.providersAddresses[1]) require.NotNil(t, report2) require.Equal(t, report, report2) - require.NotNil(t, rawReport2) - require.Equal(t, rawReport, rawReport2) -} - -func TestPerturbationWithNormalGaussianOnConcurrentComputation(t *testing.T) { - // Initialize random seed - rand.InitRandomSeed() - - // Number of iterations - iterations := 100000 - - // Original value and percentage - orig := 10.0 - percentage := 0.1 - - // Create slices to hold perturbed values - perturbationValues := make([]float64, iterations) - - // WaitGroup to wait for all Goroutines to finish - var wg sync.WaitGroup - - // Generate perturbed values concurrently - wg.Add(iterations) - for i := 0; i < iterations; i++ { - go func(index int) { - defer wg.Done() - perturbationValues[index] = pertrubWithNormalGaussian(orig, percentage) - }(i) - } - - // Wait for all Goroutines to finish, this used to panic before the fix and therefore we have this test - wg.Wait() - fmt.Println("Test completed successfully") + require.True(t, sampleTime.Equal(sampleTime2)) } // test low providers count 0-9 @@ -610,13 +612,13 @@ func TestProviderOptimizerProvidersCount(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersCount := 10 providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) - requestCU := uint64(10) - requestBlock := spectypes.LATEST_BLOCK + cu := uint64(10) + requestBlock := int64(1000) syncBlock := uint64(1000) sampleTime := time.Now() for i := 0; i < 10; i++ { for _, address := range providersGen.providersAddresses { - providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, false, true, requestCU, syncBlock, sampleTime) + providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, sampleTime) } time.Sleep(4 * time.Millisecond) } @@ -624,47 +626,20 @@ func TestProviderOptimizerProvidersCount(t *testing.T) { name string providers int }{ - { - name: "one", - providers: 1, - }, - { - name: "two", - providers: 2, - }, - { - name: "three", - providers: 3, - }, - { - name: "four", - providers: 4, - }, - { - name: "five", - providers: 5, - }, - { - name: "six", - providers: 6, - }, - { - name: "seven", - providers: 7, - }, - { - name: "eight", - providers: 8, - }, - { - name: "nine", - providers: 9, - }, + {name: "one", providers: 1}, + {name: "two", providers: 2}, + {name: "three", providers: 3}, + {name: "four", providers: 4}, + {name: "five", providers: 5}, + {name: "six", providers: 6}, + {name: "seven", providers: 7}, + {name: "eight", providers: 8}, + {name: "nine", providers: 9}, } for _, play := range playbook { t.Run(play.name, func(t *testing.T) { for i := 0; i < 10; i++ { - returnedProviders, _ := providerOptimizer.ChooseProvider(providersGen.providersAddresses[:play.providers], nil, requestCU, requestBlock) + returnedProviders, _ := providerOptimizer.ChooseProvider(providersGen.providersAddresses[:play.providers], nil, cu, requestBlock) require.Greater(t, len(returnedProviders), 0) } }) @@ -676,7 +651,7 @@ func TestProviderOptimizerWeights(t *testing.T) { providerOptimizer := setupProviderOptimizer(1) providersCount := 10 providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) - requestCU := uint64(10) + cu := uint64(10) requestBlock := spectypes.LATEST_BLOCK syncBlock := uint64(1000) sampleTime := time.Now() @@ -691,13 +666,13 @@ func TestProviderOptimizerWeights(t *testing.T) { improvedLatency := normalLatency - 5*time.Millisecond improvedBlock := syncBlock + 2 - providerOptimizer.UpdateWeights(weights, syncBlock) + providerOptimizer.UpdateWeights(weights, 1) for i := 0; i < 10; i++ { for idx, address := range providersGen.providersAddresses { if idx == 0 { - providerOptimizer.appendRelayData(address, normalLatency, false, true, requestCU, improvedBlock, sampleTime) + providerOptimizer.appendRelayData(address, normalLatency, true, cu, improvedBlock, sampleTime) } else { - providerOptimizer.appendRelayData(address, improvedLatency, false, true, requestCU, syncBlock, sampleTime) + providerOptimizer.appendRelayData(address, improvedLatency, true, cu, syncBlock, sampleTime) } sampleTime = sampleTime.Add(5 * time.Millisecond) time.Sleep(4 * time.Millisecond) @@ -705,19 +680,19 @@ func TestProviderOptimizerWeights(t *testing.T) { } // verify 0 has the best score - selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, requestBlock) + selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) tier0 := selectionTier.GetTier(0, 4, 3) require.Greater(t, len(tier0), 0) // shouldn't be empty require.Equal(t, providersGen.providersAddresses[0], tier0[0].Address) // if we pick by sync, provider 0 is in the top tier and should be selected very often - results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) require.Greater(t, tierResults[0], 600, tierResults) // we should pick the best tier most often // out of 10 providers, and with 3 in the top tier we should pick 0 around a third of that require.Greater(t, results[providersGen.providersAddresses[0]], 550, results) // we should pick the top provider in tier 0 most times due to weight // if we pick by latency only, provider 0 is in the worst tier and can't be selected at all - results, tierResults = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, int64(syncBlock), 1000) + results, tierResults = runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, int64(syncBlock)) require.Greater(t, tierResults[0], 500, tierResults) // we should pick the best tier most often // out of 10 providers, and with 3 in the top tier we should pick 0 around a third of that require.Zero(t, results[providersGen.providersAddresses[0]]) @@ -725,13 +700,12 @@ func TestProviderOptimizerWeights(t *testing.T) { func TestProviderOptimizerTiers(t *testing.T) { rand.InitRandomSeed() - + cu := uint64(10) + requestBlock := int64(1000) providersCountList := []int{9, 10} for why, providersCount := range providersCountList { providerOptimizer := setupProviderOptimizer(1) providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) - requestCU := uint64(10) - requestBlock := spectypes.LATEST_BLOCK syncBlock := uint64(1000) sampleTime := time.Now() normalLatency := TEST_BASE_WORLD_LATENCY * 2 @@ -739,16 +713,16 @@ func TestProviderOptimizerTiers(t *testing.T) { for _, address := range providersGen.providersAddresses { modifierLatency := rand.Int63n(3) - 1 modifierSync := rand.Int63n(3) - 1 - providerOptimizer.appendRelayData(address, normalLatency+time.Duration(modifierLatency)*time.Millisecond, false, true, requestCU, syncBlock+uint64(modifierSync), sampleTime) + providerOptimizer.appendRelayData(address, normalLatency+time.Duration(modifierLatency)*time.Millisecond, true, cu, syncBlock+uint64(modifierSync), sampleTime) sampleTime = sampleTime.Add(5 * time.Millisecond) time.Sleep(4 * time.Millisecond) } } - selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, requestCU, requestBlock) + selectionTier, _, _ := providerOptimizer.CalculateSelectionTiers(providersGen.providersAddresses, nil, cu, requestBlock) shiftedChances := selectionTier.ShiftTierChance(4, map[int]float64{0: 0.75}) require.NotZero(t, shiftedChances[3]) // if we pick by sync, provider 0 is in the top tier and should be selected very often - _, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, requestCU, requestBlock, 1000) + _, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, 1000, cu, requestBlock) for index := 0; index < OptimizerNumTiers; index++ { if providersCount >= 2*MinimumEntries && index == OptimizerNumTiers-1 { // skip last tier if there's insufficient providers @@ -759,53 +733,314 @@ func TestProviderOptimizerTiers(t *testing.T) { } } -func TestProviderOptimizerWithOptimizerQoSClient(t *testing.T) { +// TestProviderOptimizerChooseProvider checks that the follwing occurs: +// 0. Assume 6 providers: 2 with great score, 2 with mid score but one has a great stake, and 2 with low score (benchmark). +// We choose 2 providers in each choice. We choose many times. +// 1. ~80% of the times, the great score providers are picked (no preference between the two) +// 2. high stake mid score is picked more than 0 times and picked more than mid score with average stake +// 3. low score are not selected +func TestProviderOptimizerChooseProvider(t *testing.T) { rand.InitRandomSeed() + providerOptimizer := setupProviderOptimizer(1) + providersCount := 6 + providerOptimizer.OptimizerNumTiers = providersCount / 2 + providerOptimizer.OptimizerMinTierEntries = 2 // make each tier contain 2 providers + providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) + cu := uint64(10) + requestBlock := spectypes.LATEST_BLOCK + syncBlock := uint64(1000) + sampleTime := time.Now() - wg := sync.WaitGroup{} - wg.Add(1) - httpServerHandler := func(w http.ResponseWriter, r *http.Request) { - data := make([]byte, r.ContentLength) - r.Body.Read(data) - - optimizerQoSReport := &[]map[string]interface{}{} - err := json.Unmarshal(data, optimizerQoSReport) - require.NoError(t, err) - require.NotZero(t, len(*optimizerQoSReport)) - w.WriteHeader(http.StatusOK) - wg.Done() + // apply high stake for provider 2 + normalStake := int64(50000000000) + highStake := 5 * normalStake + highStakeProviderIndex := 2 + weights := map[string]int64{} + for i := 0; i < providersCount; i++ { + if i == highStakeProviderIndex { + weights[providersGen.providersAddresses[i]] = highStake + } else { + weights[providersGen.providersAddresses[i]] = normalStake + } } + providerOptimizer.UpdateWeights(weights, 1) + + // setup scores to all providers + improvedLatency := TEST_BASE_WORLD_LATENCY / 2 + normalLatency := TEST_BASE_WORLD_LATENCY * 2 + improvedBlock := syncBlock + 1 + + // provider 0 and 1 gets a good latency and good sync + providerOptimizer.appendRelayData(providersGen.providersAddresses[0], improvedLatency, true, cu, improvedBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[1], improvedLatency, true, cu, improvedBlock, sampleTime) - mockHttpServer := httptest.NewServer(http.HandlerFunc(httpServerHandler)) - defer mockHttpServer.Close() + // providers 2 and 3 get a good latency only + providerOptimizer.appendRelayData(providersGen.providersAddresses[2], improvedLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[3], improvedLatency, true, cu, syncBlock, sampleTime) - chainId := "dontcare" + // provider 4 and 5 gets a normal latency and sync + providerOptimizer.appendRelayData(providersGen.providersAddresses[4], normalLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[5], normalLatency, true, cu, syncBlock, sampleTime) - consumerOptimizerQoSClient := metrics.NewConsumerOptimizerQoSClient("lava@test", mockHttpServer.URL, 1, 1*time.Second) - consumerOptimizerQoSClient.StartOptimizersQoSReportsCollecting(context.Background(), 900*time.Millisecond) + // now repeat to modify all providers scores across sync calculation + sampleTime = sampleTime.Add(10 * time.Millisecond) + time.Sleep(10 * time.Millisecond) + providerOptimizer.appendRelayData(providersGen.providersAddresses[5], normalLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[4], normalLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[3], improvedLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[2], improvedLatency, true, cu, syncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[1], improvedLatency, true, cu, improvedBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[0], improvedLatency, true, cu, improvedBlock, sampleTime) + time.Sleep(4 * time.Millisecond) + + // choose many times and check results + iterations := 10000 + results, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, iterations, cu, requestBlock) + require.InDelta(t, float64(iterations)*0.7, tierResults[0], float64(iterations)*0.1) // high score are picked 60%-80% of the times + require.InDelta(t, results[providersGen.providersAddresses[0]], + results[providersGen.providersAddresses[1]], float64(results[providersGen.providersAddresses[0]])*0.1) // no difference between high score providers (max 10% diff) + require.Greater(t, results[providersGen.providersAddresses[2]], 0) // high stake mid score provider picked at least once + require.Greater(t, results[providersGen.providersAddresses[2]], results[providersGen.providersAddresses[3]]) // high stake mid score provider picked more than normal stake mid score provider + require.Equal(t, 0, results[providersGen.providersAddresses[4]]) + require.Equal(t, 0, results[providersGen.providersAddresses[5]]) +} - providerOptimizer := NewProviderOptimizer(STRATEGY_BALANCED, TEST_AVERAGE_BLOCK_TIME, TEST_BASE_WORLD_LATENCY, 10, consumerOptimizerQoSClient, chainId) - consumerOptimizerQoSClient.RegisterOptimizer(providerOptimizer, chainId) +// TestProviderOptimizerRetriesWithReducedProvidersSet checks that when having a set of providers, the amount of +// providers doesn't matter and the choice is deterministic. The test does the following: +// 0. Assume a set of providers (great/mid/low score with high/low stake, all combinations) +// 1. Run ChooseProvider() number of times. Each iteration, the chosen provider from the +// last iteration is removed from the providers set. We check the ranking of providers stays the same. +// 2. Do step 1 many times. +// Expected: the ranking of providers stays the same, providers with high stake are picked more often, +// providers from the lowest tier are not picked +func TestProviderOptimizerRetriesWithReducedProvidersSet(t *testing.T) { + rand.InitRandomSeed() + providerOptimizer := setupProviderOptimizer(1) + providersCount := 6 + providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) + // create 3 tiers with 2 providers each + providerOptimizer.OptimizerNumTiers = providersCount / 2 + providerOptimizer.OptimizerMinTierEntries = 2 + + // apply high stake for providers 1, 3, 5 + normalStake := int64(50000000000) + highStake := 5 * normalStake + highStakeProviderIndexes := []int{1, 3, 5} + weights := map[string]int64{} + for i := 0; i < providersCount; i++ { + if lavaslices.Contains(highStakeProviderIndexes, i) { + weights[providersGen.providersAddresses[i]] = highStake + } else { + weights[providersGen.providersAddresses[i]] = normalStake + } + } + providerOptimizer.UpdateWeights(weights, 1) + cu := uint64(10) + requestBlock := int64(1000) syncBlock := uint64(1000) + sampleTime := time.Now() + baseLatency := TEST_BASE_WORLD_LATENCY.Seconds() + + // append relay data for each provider depending on its index in the providers array + // the latency gets worse for increasing index so we assume the best provider is the 1st + // address, after it the 2nd and so on + for i := 0; i < 50; i++ { + for j, address := range providersGen.providersAddresses { + latency := time.Duration(baseLatency * float64(2*j+1) * float64(time.Millisecond)) + providerOptimizer.appendRelayData(address, latency, true, cu, syncBlock, sampleTime) + } + sampleTime = sampleTime.Add(5 * time.Millisecond) + time.Sleep(5 * time.Millisecond) + } - providerAddr := "lava@test" + // choose many times with different sets of providers and check the ranking stays the same + // Expected: providers with high stake are picked more often, providers from the lowest tier are not picked + // Note, on the last two iterations, providers 4,5 are picked and provider 4 is picked more than provider 5 + // since there is only one tier and provider 4 has higher stake than provider 5 + for i := 0; i < providersCount; i++ { + // run and choose many times and keep a map of provider address -> number of times it was picked + iterations := 1000 + res, tierResults := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses[i:], nil, iterations, cu, requestBlock) + + switch i { + case 0: + // 6 providers, 3 tiers, last one not picked so only + // providers 0,1,2,3 are picked. tier 0: providers 0,1 + // tier 1: providers 2,3 + // provider 1,3 have higher stake and should be picked more often within their tier + require.Greater(t, tierResults[0], 550) + require.Greater(t, tierResults[0], tierResults[1]) + require.Equal(t, 4, len(res)) + require.Greater(t, res[providersGen.providersAddresses[1]], res[providersGen.providersAddresses[0]]) + require.Greater(t, res[providersGen.providersAddresses[3]], res[providersGen.providersAddresses[2]]) + case 1: + // 5 providers, 3 tiers, last one not picked so only + // providers 1,2,3 are picked. tier 0: providers 1,2 + // tier 1: providers 2,3,4 (2 and 4 with part) + // provider 1 has higher stake and should be picked more often within their tier + // provider 3 has higher stake than provider 4 and 4 is in tier 1 and 2 (worst tier) so + // provider 3 should be picked more often than provider 4 + require.Greater(t, tierResults[0], 550) + require.Greater(t, tierResults[0], tierResults[1]) + require.Equal(t, 4, len(res)) + require.Greater(t, res[providersGen.providersAddresses[1]], res[providersGen.providersAddresses[2]]) + require.Greater(t, res[providersGen.providersAddresses[3]], res[providersGen.providersAddresses[4]]) + case 2: + // 4 providers, 3 tiers, last one not picked so only + // providers 2,3,4 are picked. tier 0: providers 2,3 + // tier 1: providers 3,4 + // provider 3 has higher stake and should be picked more often within their tier + // provider 3 has higher stake than provider 4 and 4 is in tier 1 and 2 (worst tier) so + // provider 3 should be picked more often than provider 4 + require.Greater(t, tierResults[0], 550) + require.Greater(t, tierResults[0], tierResults[1]) + require.Equal(t, 3, len(res)) + require.Greater(t, res[providersGen.providersAddresses[3]], res[providersGen.providersAddresses[2]]) + require.Greater(t, res[providersGen.providersAddresses[3]], res[providersGen.providersAddresses[4]]) + case 3: + // 3 providers, 3 tiers, last one not picked + // minimum entries per tier is 2 and there are 1 provider per tier + // because of this, each tier > 0 will have 2 providers and not 1 + // providers 3,4,5 are picked. tier 0: providers 3 + // tier 1: providers 4,5 + // provider 5 has higher stake and should be picked more often within their tier + require.Greater(t, tierResults[0], 540) + require.Greater(t, tierResults[0], tierResults[1]) + require.Equal(t, 3, len(res)) + require.Greater(t, res[providersGen.providersAddresses[5]], res[providersGen.providersAddresses[4]]) + case 4: + // 2 providers, 2 tiers + // there are less providers than tiers, so num tiers is reduced to 2 + // providers 4,5 are picked. tier 0: providers 4 + // tier 1: providers 4,5 (4 with part=0.5, because it's dragged from tier 0) + // provider 4 is picked more often than provider 5 even though it has less stake + // because it's the only provider in tier 0 + require.Greater(t, tierResults[0], 550) + require.Greater(t, tierResults[0], tierResults[1]) + require.Equal(t, 2, len(res)) + require.Greater(t, res[providersGen.providersAddresses[4]], res[providersGen.providersAddresses[5]]) + } + } +} + +// TestProviderOptimizerChoiceSimulation checks that the overall choice mechanism acts as expected, +// For each of the following metrics: latency, sync, availability and stake we do the following: +// 0. Assume 2 providers +// 1. Append relay data for both providers with random samples. The "better" provider will have a randomized +// sample with a better range (for example, the better one gets latency of 10-30ms and the bad one gets 25-40ms) +// 2. Choose between them and verify the better one is chosen more. +func TestProviderOptimizerChoiceSimulation(t *testing.T) { + rand.InitRandomSeed() + providerOptimizer := setupProviderOptimizer(1) + providersCount := 2 + providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) + cu := uint64(10) + requestBlock := int64(1000) + syncBlock := uint64(1000) + sampleTime := time.Now() + baseLatency := TEST_BASE_WORLD_LATENCY.Seconds() + + providerOptimizer.OptimizerNumTiers = 2 + providerOptimizer.OptimizerMinTierEntries = 1 + + // initial values + p1Latency := baseLatency * float64(time.Millisecond) + p2Latency := baseLatency * float64(time.Millisecond) + p1SyncBlock := syncBlock + p2SyncBlock := syncBlock + p1Availability := true + p2Availability := true + + // append relay data for each provider depending on its index in the providers array + // the latency gets worse for increasing index so we assume the best provider is the 1st + // address, after it the 2nd and so on + for i := 0; i < 1000; i++ { + // randomize latency, provider 0 gets a better latency than provider 1 + p1Latency += float64(rand.Int63n(21)+10) * float64(time.Millisecond) // Random number between 10-30 + p2Latency += float64(rand.Int63n(11)+30) * float64(time.Millisecond) // Random number between 30-40 + + // randomize sync, provider 0 gets a better sync than provider 1 + if rand.Float64() < 0.1 { // 10% chance to increment both + p1SyncBlock++ + p2SyncBlock++ + } + if rand.Float64() < 0.05 { // 5% chance to increment only p1 + p1SyncBlock++ + } - providerOptimizer.UpdateWeights(map[string]int64{ - providerAddr: 1000000000, - }, syncBlock) + // randomize availability, provider 0 gets a better availability than provider 1 + if rand.Float64() < 0.1 { // 10% chance to false for p2 + p2Availability = false + } + if rand.Float64() < 0.05 { // 5% chance to false for both + p1Availability = false + p2Availability = false + } - requestCU := uint64(10) + providerOptimizer.appendRelayData(providersGen.providersAddresses[0], time.Duration(p1Latency), p1Availability, cu, p1SyncBlock, sampleTime) + providerOptimizer.appendRelayData(providersGen.providersAddresses[1], time.Duration(p2Latency), p2Availability, cu, p2SyncBlock, sampleTime) - normalLatency := TEST_BASE_WORLD_LATENCY * 2 - providerOptimizer.appendRelayData(providerAddr, normalLatency, false, true, requestCU, syncBlock, time.Now()) + sampleTime = sampleTime.Add(5 * time.Millisecond) + time.Sleep(5 * time.Millisecond) + } - wg.Wait() + // choose many times and check the better provider is chosen more often (provider 0) + iterations := 1000 + res, _ := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, iterations, cu, requestBlock) + require.Greater(t, res[providersGen.providersAddresses[0]], res[providersGen.providersAddresses[1]]) } -// TODO: new tests we need: -// check 3 providers, one with great stake one with great score -// retries: groups getting smaller -// no possible selections full -// do a simulation with better and worse providers, make sure it's good -// TODO: Oren - check optimizer selection with defaults (no scores for some of the providers) +// TestProviderOptimizerLatencySyncScore tests that a provider with 100ms latency and x sync block +// has the same score as a provider with 1100ms latency but x+1 sync block +// This is true since the average block time is 10sec and the default sync factor is 0.3. So +// score_good_latency = latency + sync_factor * sync_lag + ... = 0.01 + 0.3 * 10 + ... = 3.01 + ... +// score_good_sync = latency + sync_factor * sync_lag + ... = 3.01 + 0.3 * 0 + ... = 3.01 + ... +func TestProviderOptimizerLatencySyncScore(t *testing.T) { + rand.InitRandomSeed() + providerOptimizer := setupProviderOptimizer(1) + providersCount := 2 + providersGen := (&providersGenerator{}).setupProvidersForTest(providersCount) + cu := uint64(10) + requestBlock := spectypes.LATEST_BLOCK + syncBlock := uint64(1000) + + improvedLatency := TEST_BASE_WORLD_LATENCY + badLatency := TEST_BASE_WORLD_LATENCY + 3*time.Second // sync factor is 0.3 so add 3 seconds + + // set a basic state for all providers + sampleTime := time.Now() + for i := 0; i < 10; i++ { + for _, address := range providersGen.providersAddresses { + providerOptimizer.appendRelayData(address, TEST_BASE_WORLD_LATENCY*2, true, cu, syncBlock, sampleTime) + } + time.Sleep(4 * time.Millisecond) + } + + // provider 0 gets a good sync with bad latency + providerOptimizer.appendRelayData(providersGen.providersAddresses[0], badLatency, true, cu, syncBlock+1, sampleTime) + + // provider 1 gets a good latency with bad sync + providerOptimizer.appendRelayData(providersGen.providersAddresses[1], improvedLatency, true, cu, syncBlock, sampleTime.Add(TEST_AVERAGE_BLOCK_TIME)) + + // verify both providers have the same score + scores := []math.LegacyDec{} + for _, provider := range providersGen.providersAddresses { + qos, _ := providerOptimizer.GetExcellenceQoSReportForProvider(provider) + require.NotNil(t, qos) + score, err := qos.ComputeQoSExcellence() + require.NoError(t, err) + scores = append(scores, score) + } + require.Len(t, scores, 2) + s0, err := scores[0].Float64() + require.NoError(t, err) + s1, err := scores[1].Float64() + require.NoError(t, err) + require.InDelta(t, s0, s1, 0.01) + + // choose many times - since their scores should be the same, they should be picked in a similar amount + iterations := 1000 + res, _ := runChooseManyTimesAndReturnResults(t, providerOptimizer, providersGen.providersAddresses, nil, iterations, cu, requestBlock) + require.InDelta(t, res[providersGen.providersAddresses[0]], res[providersGen.providersAddresses[1]], float64(iterations)*0.1) +} diff --git a/protocol/provideroptimizer/selection_tier.go b/protocol/provideroptimizer/selection_tier.go index 0da0915653..74dca1bef4 100644 --- a/protocol/provideroptimizer/selection_tier.go +++ b/protocol/provideroptimizer/selection_tier.go @@ -14,7 +14,13 @@ type Entry struct { Part float64 } -// selectionTier is a utility to get a tier of addresses based on their scores +// selectionTier is a utility to categorize provider addresses based on their +// relative stakes. This mechanism ensures that providers with similar stakes +// compete for relays based on their service quality. For example, if there are +// multiple providers with low stakes but good service, they will compete more +// directly with each other than with a provider with a high stake but poor service. +// This helps prevent providers with large stakes from monopolizing relay +// services. type SelectionTier interface { AddScore(entry string, score float64) GetTier(tier int, numTiers int, minimumEntries int) []Entry @@ -146,7 +152,7 @@ func (st *SelectionTierInst) ShiftTierChance(numTiers int, initialTierChances ma shiftedTierChances[i] = chanceForDefaultTiers + averageChance*offsetFactor } } else { - if initialTierChances[i] > 0 { + if initialTierChances[i] > LastTierChance { shiftedTierChances[i] = initialTierChances[i] + averageChance*offsetFactor } } @@ -165,6 +171,10 @@ func (st *SelectionTierInst) ShiftTierChance(numTiers int, initialTierChances ma func (st *SelectionTierInst) GetTier(tier int, numTiers int, minimumEntries int) []Entry { // get the tier of scores for the given tier and number of tiers entriesLen := len(st.scores) + if entriesLen < numTiers { + utils.LavaFormatError("Number of tiers is greater than the number of scores", nil, utils.LogAttr("entriesLen", entriesLen), utils.LogAttr("numTiers", numTiers)) + return st.scores + } if entriesLen < minimumEntries || numTiers == 0 || tier >= numTiers { return st.scores } @@ -176,9 +186,18 @@ func (st *SelectionTierInst) GetTier(tier int, numTiers int, minimumEntries int) } ret := st.scores[start:end] if len(ret) >= minimumEntries { - // apply the relative parts to the first and last entries - ret[0].Part = 1 - fracStart - ret[len(ret)-1].Part = fracEnd + // First entry + ret[0].Part = fracStart + + // Middle entries + for i := 1; i < len(ret)-1; i++ { + ret[i].Part = 1.0 + } + + // Last entry + if len(ret) > 1 { + ret[len(ret)-1].Part = fracEnd + } return ret } // bring in entries from better tiers if insufficient, give them a handicap to weight @@ -195,15 +214,34 @@ func (st *SelectionTierInst) GetTier(tier int, numTiers int, minimumEntries int) return ret } +// getPositionsForTier calculates the start and end positions for a given tier, +// along with fractional adjustments for boundary entries +// outputs: first entry index for this tier, last entry index for this tier (exclusive), fractional part +// for first entry, fractional part for last entry +// Note: this function assumes that numTiers is not greater than the number of entries func getPositionsForTier(tier int, numTiers int, entriesLen int) (start int, end int, fracStart float64, fracEnd float64) { - rankStart := float64(tier) / float64(numTiers) - rankEnd := float64(tier+1) / float64(numTiers) - // Calculate the position based on the rank - startPositionF := (float64(entriesLen-1) * rankStart) - endPositionF := (float64(entriesLen-1) * rankEnd) + if numTiers <= 0 || entriesLen <= 0 { + return 0, entriesLen, 0, 0 + } + + // calculate the part of the first and last entries in the tier + tierSize := float64(entriesLen) / float64(numTiers) + fracStart = math.Ceil(tierSize*float64(tier)) - tierSize*float64(tier) + + leftover := tierSize - fracStart + // the fractional part of the leftover is fracEnd + fracEnd = leftover - math.Floor(leftover) - positionStart := int(startPositionF) - positionEnd := int(endPositionF) + 1 + if math.Abs(fracStart) < 1e-10 { + fracStart = 1.0 + } + + if math.Abs(fracEnd) < 1e-10 { + fracEnd = 1.0 + } - return positionStart, positionEnd, startPositionF - float64(positionStart), float64(positionEnd) - endPositionF + // calculate the start and end positions + start = int(math.Floor(tierSize * float64(tier))) + end = int(math.Ceil(tierSize * float64(tier+1))) + return start, end, fracStart, fracEnd } diff --git a/protocol/provideroptimizer/selection_tier_test.go b/protocol/provideroptimizer/selection_tier_test.go index 0d4f5f97e7..5f4bc42108 100644 --- a/protocol/provideroptimizer/selection_tier_test.go +++ b/protocol/provideroptimizer/selection_tier_test.go @@ -59,13 +59,13 @@ func TestSelectionTierInst_GetTier(t *testing.T) { { tier: 1, minimumEntries: 2, - expectedTier: []string{"entry5", "entry4", "entry2"}, + expectedTier: []string{"entry4", "entry2"}, name: "tier 1, 2 entries", }, { tier: 2, minimumEntries: 2, - expectedTier: []string{"entry2", "entry3", "entry6"}, + expectedTier: []string{"entry3", "entry6"}, name: "tier 2, 2 entries", }, { @@ -139,20 +139,20 @@ func TestSelectionTierInstGetTierBig(t *testing.T) { { tier: 1, minimumEntries: 5, - expectedTierLen: 26, - name: "tier 1, 26 entries", + expectedTierLen: 25, + name: "tier 1, 25 entries", }, { tier: 2, minimumEntries: 5, - expectedTierLen: 26, - name: "tier 2, 26 entries", + expectedTierLen: 25, + name: "tier 2, 25 entries", }, { tier: 3, minimumEntries: 5, - expectedTierLen: 26, - name: "tier 3, 26 entries", + expectedTierLen: 25, + name: "tier 3, 25 entries", }, { tier: 0, @@ -311,3 +311,74 @@ func TestSelectionTierInst_SelectTierRandomly_Default(t *testing.T) { assert.InDelta(t, expectedDistribution, count, 300) } } + +// TestTierParts tests that when getting a tier, the sum of the parts of the entries +// in each tier is equal to the expected value of entries/numTiers +// note, it's assumed that the number of entries is greater or equal to the number of tiers +func TestTierParts(t *testing.T) { + templete := []struct { + name string + numTiers int + entriesLen int + expected map[int][]float64 // expected parts for each tier + }{ + {"3 tiers 6 entries", 3, 6, map[int][]float64{ + 0: {1.0, 1.0}, + 1: {1.0, 1.0}, + 2: {1.0, 1.0}, + }}, + {"3 tiers 3 entries", 3, 3, map[int][]float64{ + 0: {1.0}, + 1: {1.0}, + 2: {1.0}, + }}, + {"3 tiers 5 entries", 3, 5, map[int][]float64{ + 0: {1.0, 2.0 / 3.0}, + 1: {1.0 / 3.0, 1.0, 1.0 / 3.0}, + 2: {2.0 / 3.0, 1.0}, + }}, + {"3 tiers 4 entries", 3, 4, map[int][]float64{ + 0: {1.0, 1.0 / 3.0}, + 1: {2.0 / 3.0, 2.0 / 3.0}, + 2: {1.0 / 3.0, 1.0}, + }}, + {"4 tiers 11 entries", 4, 11, map[int][]float64{ + 0: {1.0, 1.0, 0.75}, + 1: {0.25, 1.0, 1.0, 0.5}, + 2: {0.5, 1.0, 1.0, 0.25}, + 3: {0.75, 1.0, 1.0}, + }}, + {"4 tiers 10 entries", 4, 10, map[int][]float64{ + 0: {1.0, 1.0, 0.5}, + 1: {0.5, 1.0, 1.0}, + 2: {1.0, 1.0, 0.5}, + 3: {0.5, 1.0, 1.0}, + }}, + } + + for _, play := range templete { + for tier := 0; tier < play.numTiers; tier++ { + st := NewSelectionTier() + // add entries to the selection tier + for i := 0; i < play.entriesLen; i++ { + st.AddScore("entry"+strconv.Itoa(i), 0.1) + } + + // get tier parts + partsSum := 0.0 + parts := []float64{} + entries := st.GetTier(tier, play.numTiers, 1) + for _, entry := range entries { + partsSum += entry.Part + parts = append(parts, entry.Part) + } + + for i := range parts { + require.InDelta(t, play.expected[tier][i], parts[i], 0.001, + "tier: %d, entriesLen: %d, numTiers: %d, index: %d", tier, play.entriesLen, play.numTiers, i) + } + assert.InDelta(t, float64(play.entriesLen)/float64(play.numTiers), partsSum, 0.01, + "tier: %d, entriesLen: %d, numTiers: %d", tier, play.entriesLen, play.numTiers) + } + } +} diff --git a/protocol/rpcconsumer/rpcconsumer.go b/protocol/rpcconsumer/rpcconsumer.go index c141fb97cb..88b82f82b6 100644 --- a/protocol/rpcconsumer/rpcconsumer.go +++ b/protocol/rpcconsumer/rpcconsumer.go @@ -72,7 +72,7 @@ var strategyNames = []string{ "distributed", } -var strategyFlag strategyValue = strategyValue{Strategy: provideroptimizer.STRATEGY_BALANCED} +var strategyFlag strategyValue = strategyValue{Strategy: provideroptimizer.StrategyBalanced} func (s *strategyValue) String() string { return strategyNames[int(s.Strategy)] @@ -368,10 +368,8 @@ func (rpcc *RPCConsumer) CreateConsumerEndpoint( var loaded bool var err error - baseLatency := common.AverageWorldLatency / 2 // we want performance to be half our timeout or better - // Create / Use existing optimizer - newOptimizer := provideroptimizer.NewProviderOptimizer(options.strategy, averageBlockTime, baseLatency, options.maxConcurrentProviders, consumerOptimizerQoSClient, chainID) + newOptimizer := provideroptimizer.NewProviderOptimizer(options.strategy, averageBlockTime, options.maxConcurrentProviders, consumerOptimizerQoSClient, chainID) optimizer, loaded, err = optimizers.LoadOrStore(chainID, newOptimizer) if err != nil { return utils.LavaFormatError("failed loading optimizer", err, utils.LogAttr("endpoint", rpcEndpoint.Key())) @@ -635,7 +633,7 @@ rpcconsumer consumer_examples/full_consumer_example.yml --cache-be "127.0.0.1:77 utils.LavaFormatInfo("cache service connected", utils.Attribute{Key: "address", Value: cacheAddr}) } } - if strategyFlag.Strategy != provideroptimizer.STRATEGY_BALANCED { + if strategyFlag.Strategy != provideroptimizer.StrategyBalanced { utils.LavaFormatInfo("Working with selection strategy: " + strategyFlag.String()) } diff --git a/protocol/rpcconsumer/rpcconsumer_server_test.go b/protocol/rpcconsumer/rpcconsumer_server_test.go index 50ab71858a..94642fb28b 100644 --- a/protocol/rpcconsumer/rpcconsumer_server_test.go +++ b/protocol/rpcconsumer/rpcconsumer_server_test.go @@ -53,8 +53,7 @@ func createRpcConsumer(t *testing.T, ctrl *gomock.Controller, ctx context.Contex finalizationConsensus := finalizationconsensus.NewFinalizationConsensus(rpcEndpoint.ChainID) _, averageBlockTime, _, _ := chainParser.ChainBlockStats() - baseLatency := common.AverageWorldLatency / 2 - optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.STRATEGY_BALANCED, averageBlockTime, baseLatency, 2, nil, "dontcare") + optimizer := provideroptimizer.NewProviderOptimizer(provideroptimizer.StrategyBalanced, averageBlockTime, 2, nil, "dontcare") consumerSessionManager := lavasession.NewConsumerSessionManager(rpcEndpoint, optimizer, nil, nil, "test", lavasession.NewActiveSubscriptionProvidersStorage()) consumerSessionManager.UpdateAllProviders(epoch, map[uint64]*lavasession.ConsumerSessionsWithProvider{ epoch: { diff --git a/utils/score/decay_score.go b/utils/score/decay_score.go deleted file mode 100644 index 5827ebee62..0000000000 --- a/utils/score/decay_score.go +++ /dev/null @@ -1,43 +0,0 @@ -package score - -import ( - "math" - "time" -) - -type ScoreStore struct { - Num float64 // for performance i didn't use math/big rationale arithmetic - Denom float64 - Time time.Time -} - -func NewScoreStore(num, denom float64, inpTime time.Time) ScoreStore { - return ScoreStore{Num: num, Denom: denom, Time: inpTime} -} - -// CalculateTimeDecayFunctionUpdate calculates the time decayed score update between two ScoreStore entries. -// It uses a decay function with a half life of halfLife to factor in the time elapsed since the oldScore was recorded. -// Both the numerator and the denominator of the newScore are decayed by this function. -// Additionally, the newScore is factored by a weight of updateWeight. -// The function returns a new ScoreStore entry with the updated numerator, denominator, and current time. -// -// The mathematical equation used to calculate the update is: -// -// updatedNum = oldScore.Num*exp(-(now-oldScore.Time)/halfLife) + newScore.Num*exp(-(now-newScore.Time)/halfLife)*updateWeight -// updatedDenom = oldScore.Denom*exp(-(now-oldScore.Time)/halfLife) + newScore.Denom*exp(-(now-newScore.Time)/halfLife)*updateWeight -// -// where now is the current time. -// -// Note that the returned ScoreStore has a new Time field set to the current time. -func CalculateTimeDecayFunctionUpdate(oldScore, newScore ScoreStore, halfLife time.Duration, updateWeight float64, sampleTime time.Time) (normalizedScoreStore ScoreStore, rawScoreStore ScoreStore) { - oldDecayExponent := math.Ln2 * sampleTime.Sub(oldScore.Time).Seconds() / halfLife.Seconds() - oldDecayFactor := math.Exp(-oldDecayExponent) - newDecayExponent := math.Ln2 * sampleTime.Sub(newScore.Time).Seconds() / halfLife.Seconds() - newDecayFactor := math.Exp(-newDecayExponent) - updatedNum := oldScore.Num*oldDecayFactor + newScore.Num*newDecayFactor*updateWeight - updatedDenom := oldScore.Denom*oldDecayFactor + newScore.Denom*newDecayFactor*updateWeight - - // Raw denom = denom not divided by benchmark value (=denom of a new ScoreStore) - updatedRawDenom := oldDecayFactor + newDecayFactor*updateWeight // removed newScore.Denom from update to get raw data - return NewScoreStore(updatedNum, updatedDenom, sampleTime), NewScoreStore(updatedNum, updatedRawDenom, sampleTime) -} diff --git a/utils/score/score_config.go b/utils/score/score_config.go new file mode 100644 index 0000000000..0501421715 --- /dev/null +++ b/utils/score/score_config.go @@ -0,0 +1,108 @@ +package score + +import ( + "fmt" + "time" +) + +// Config defines a collection of parameters that can be used by ScoreStore. ScoreStore is a +// decaying weighted average object that is used to collect providers performance metrics samples. +// These are used to calculate the providers QoS excellence score, used by the provider optimizer +// when choosing providers to be paired with a consumer. +// +// Config parameters: +// 1. Weight: sample weight that multiplies the sample when updating the ScoreStore. +// 2. HalfLife: defines the half life time of the decaying exponent used in the ScoreStore. +// 3. LatencyCuFactor: an additional multiplier to latency samples that is determined by +// the amount of CU used by the relay that the provider serviced. +// +// Additional info: +// CU latency factors are used to scale down high CU latencies when updating the latency ScoreStore +// so it can be safely added to the score average without bias (for example, a high CU +// latency sample from a TX might be 10sec while a low CU latency sample from a basic query might +// be 10ms and they're both considered good response time from the provider) +// +// TODO: high latency can be due to archive requests, addons, etc. This implementation +// is only partial since it considers only the CU amount + +const ( + DefaultHalfLifeTime = time.Hour + MaxHalfTime = 3 * time.Hour + + DefaultWeight float64 = 1 + ProbeUpdateWeight float64 = 0.25 + RelayUpdateWeight float64 = 1 + + // TODO: find actual numbers from info of latencies of high/mid/low CU from "stats.lavanet.xyz". + // Do a distribution and find average factor to multiply the failure cost by. + DefaultCuLatencyFactor = LowCuLatencyFactor + HighCuLatencyFactor = 0.01 // for cu > HighCuThreshold + MidCuLatencyFactor = 0.1 // for MidCuThreshold < cu < HighCuThreshold + LowCuLatencyFactor = float64(1) // for cu < MidCuThreshold + + HighCuThreshold = uint64(100) + MidCuThreshold = uint64(50) +) + +type Config struct { + Weight float64 + HalfLife time.Duration + LatencyCuFactor float64 // should only be used for latency samples +} + +var defaultConfig = Config{ + Weight: DefaultWeight, + HalfLife: DefaultHalfLifeTime, + LatencyCuFactor: DefaultCuLatencyFactor, +} + +// Validate validates the Config's fields hold valid values +func (c Config) Validate() error { + if c.Weight <= 0 { + return fmt.Errorf("invalid config: weight must be strictly positive, weight: %f", c.Weight) + } + if c.HalfLife.Seconds() <= 0 { + return fmt.Errorf("invalid config: half life time must be strictly positive, half life: %f", c.HalfLife.Seconds()) + } + if c.LatencyCuFactor <= 0 || c.LatencyCuFactor > 1 { + return fmt.Errorf("invalid config: latency cu factor must be between (0,1], latency cu factor: %f", c.LatencyCuFactor) + } + return nil +} + +// String prints a Config's fields +func (c Config) String() string { + return fmt.Sprintf("weight: %f, decay_half_life_time_sec: %f, latency_cu_factor: %f", c.Weight, c.HalfLife.Seconds(), c.LatencyCuFactor) +} + +// Option is used as a generic and elegant way to configure a new ScoreStore +type Option func(*Config) + +func WithWeight(weight float64) Option { + return func(c *Config) { + c.Weight = weight + } +} + +func WithDecayHalfLife(halfLife time.Duration) Option { + return func(c *Config) { + c.HalfLife = halfLife + } +} + +func WithLatencyCuFactor(factor float64) Option { + return func(c *Config) { + c.LatencyCuFactor = factor + } +} + +// GetLatencyFactor returns the appropriate latency factor by the CU amount +func GetLatencyFactor(cu uint64) float64 { + if cu > HighCuThreshold { + return HighCuLatencyFactor + } else if cu < MidCuThreshold { + return LowCuLatencyFactor + } + + return MidCuLatencyFactor +} diff --git a/utils/score/score_config_test.go b/utils/score/score_config_test.go new file mode 100644 index 0000000000..b5b6286326 --- /dev/null +++ b/utils/score/score_config_test.go @@ -0,0 +1,54 @@ +package score_test + +import ( + "testing" + "time" + + "github.com/lavanet/lava/v4/utils/score" + "github.com/stretchr/testify/require" +) + +func TestConfigValidation(t *testing.T) { + template := []struct { + name string + config score.Config + valid bool + }{ + {name: "valid", config: score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 1}, valid: true}, + {name: "invalid weight", config: score.Config{Weight: -1, HalfLife: time.Second, LatencyCuFactor: 1}, valid: false}, + {name: "invalid half life", config: score.Config{Weight: 1, HalfLife: -time.Second, LatencyCuFactor: 1}, valid: false}, + {name: "invalid zero latency cu factor", config: score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 0}, valid: false}, + {name: "invalid >1 latency cu factor", config: score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 1.01}, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + if tt.valid { + require.NoError(t, err) + } else { + require.Error(t, err) + } + }) + } +} + +func TestConfigModification(t *testing.T) { + config := score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 1} + weight := float64(2) + halfLife := 3 * time.Second + latencyCuFactor := 0.5 + + opts := []score.Option{ + score.WithWeight(weight), + score.WithDecayHalfLife(halfLife), + score.WithLatencyCuFactor(latencyCuFactor), + } + for _, opt := range opts { + opt(&config) + } + + require.Equal(t, weight, config.Weight) + require.Equal(t, halfLife, config.HalfLife) + require.Equal(t, latencyCuFactor, config.LatencyCuFactor) +} diff --git a/utils/score/score_store.go b/utils/score/score_store.go new file mode 100644 index 0000000000..8ea7768253 --- /dev/null +++ b/utils/score/score_store.go @@ -0,0 +1,379 @@ +package score + +import ( + "fmt" + "math" + "time" + + "cosmossdk.io/errors" + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/utils" +) + +const ( + DecPrecision int64 = 8 + InitialDataStaleness = 24 * time.Hour +) + +// ScoreStore is a decaying weighted average object that is used to collect +// providers performance metrics samples (see QoS excellence comment below). +// These are used to calculate the providers QoS excellence score, used +// by the provider optimizer when choosing providers to be paired with a consumer. +// +// ScoreStore holds a score's numerator and denominator, last update timestamp, and a +// configuration object. When a ScoreStore updates it uses a decay exponent to lower +// the weight of old average samples and a weight parameter to determine the influence +// of the new sample. +// +// Resolving the ScoreStore's num and denom means to divide the num by the denom to get +// the score. Keeping the score as a fracture helps calculating and updating weighted +// average calculations on the go. +type ScoreStore struct { + Name string + Num float64 // using float64 and not math/big for performance + Denom float64 + Time time.Time + Config Config +} + +// ScoreStorer defines the interface for all score stores +type ScoreStorer interface { + Update(sample float64, sampleTime time.Time) error + Resolve() (float64, error) + Validate() error + String() string + UpdateConfig(opts ...Option) error + + GetName() string + GetNum() float64 + GetDenom() float64 + GetLastUpdateTime() time.Time + GetConfig() Config +} + +// NewCustomScoreStore creates a new custom ScoreStorer based on the score type +func NewCustomScoreStore(scoreType string, num, denom float64, t time.Time, opts ...Option) (ScoreStorer, error) { + cfg := defaultConfig + for _, opt := range opts { + opt(&cfg) + } + + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("cannot create %s ScoreStore, invalid configuration: %w", scoreType, err) + } + + base := &ScoreStore{ + Num: num, + Denom: denom, + Time: t, + Config: cfg, + } + + if err := base.Validate(); err != nil { + return nil, fmt.Errorf("cannot create %s ScoreStore, invalid parameters: %w", scoreType, err) + } + + switch scoreType { + case LatencyScoreType: + base.Name = LatencyScoreType + return &LatencyScoreStore{ScoreStore: base}, nil + case SyncScoreType: + base.Name = SyncScoreType + return &SyncScoreStore{ScoreStore: base}, nil + case AvailabilityScoreType: + base.Name = AvailabilityScoreType + return &AvailabilityScoreStore{ScoreStore: base}, nil + default: + return nil, fmt.Errorf("unknown score type: %s", scoreType) + } +} + +// NewScoreStore creates a new default ScoreStorer based on the score type +func NewScoreStore(scoreType string) ScoreStorer { + switch scoreType { + case LatencyScoreType: + // default latency: 10ms + latencyScoreStore, err := NewCustomScoreStore(scoreType, DefaultLatencyNum, 1, time.Now().Add(-InitialDataStaleness)) + if err != nil { + utils.LavaFormatFatal("cannot create default "+scoreType+" ScoreStore", err) + } + return latencyScoreStore + + case SyncScoreType: + // default sync: 100ms + syncScoreStore, err := NewCustomScoreStore(scoreType, DefaultSyncNum, 1, time.Now().Add(-InitialDataStaleness)) + if err != nil { + utils.LavaFormatFatal("cannot create default "+scoreType+" ScoreStore", err) + } + return syncScoreStore + + case AvailabilityScoreType: + // default availability: 1 + availabilityScoreStore, err := NewCustomScoreStore(scoreType, DefaultAvailabilityNum, 1, time.Now().Add(-InitialDataStaleness)) + if err != nil { + utils.LavaFormatFatal("cannot create default "+scoreType+" ScoreStore", err) + } + return availabilityScoreStore + default: + utils.LavaFormatFatal("cannot create default "+scoreType+" ScoreStore", fmt.Errorf("unknown score type: %s", scoreType)) + return nil // not reached + } +} + +// String prints a ScoreStore's fields +func (ss *ScoreStore) String() string { + return fmt.Sprintf("num: %f, denom: %f, last_update_time: %s, config: %s", + ss.Num, ss.Denom, ss.Time.String(), ss.Config.String()) +} + +// Validate validates the ScoreStore's fields hold valid values +func (ss *ScoreStore) Validate() error { + if ss.Num < 0 || ss.Denom <= 0 { + return fmt.Errorf("invalid %s ScoreStore: num or denom are non-positives, num: %f, denom: %f", ss.Name, ss.Num, ss.Denom) + } + + if err := ss.Config.Validate(); err != nil { + return errors.Wrap(err, "invalid "+ss.Name+" ScoreStore") + } + return nil +} + +// Resolve resolves the ScoreStore's frac by dividing the numerator by the denominator +func (ss *ScoreStore) Resolve() (float64, error) { + if err := ss.Validate(); err != nil { + return 0, errors.Wrap(err, "cannot calculate "+ss.Name+" ScoreStore's score") + } + return ss.Num / ss.Denom, nil +} + +// UpdateConfig updates the configuration of a ScoreStore +func (ss *ScoreStore) UpdateConfig(opts ...Option) error { + cfg := ss.Config + for _, opt := range opts { + opt(&cfg) + } + + if err := cfg.Validate(); err != nil { + return fmt.Errorf("invalid configuration: %w", err) + } + ss.Config = cfg + + return nil +} + +// update updates the ScoreStore's numerator and denominator with a new sample. +// The ScoreStore's score is calculated as a weighted average with a decay factor. +// The new sample is added by the following formula: +// +// num = num * decay_factor + sample * weight +// denom = denom * decay_factor + weight +// decay_factor = exp(-time_since_last_update / half_life_time) +func (ss *ScoreStore) Update(sample float64, sampleTime time.Time) error { + if ss == nil { + return fmt.Errorf("cannot update ScoreStore, ScoreStore is nil") + } + + if sample < 0 { + return fmt.Errorf("cannot update %s ScoreStore, sample is negative: %f", ss.Name, sample) + } + + if ss.Time.After(sampleTime) { + return fmt.Errorf("invalid %s ScoreStore: last update time in the future, last_update_time: %s, sample_time: %s", ss.Name, ss.Time.String(), sampleTime.String()) + } + + timeDiff := sampleTime.Sub(ss.Time).Seconds() + if timeDiff < 0 { + return fmt.Errorf("invalid time difference: %f seconds", timeDiff) + } + + exponent := -(math.Ln2 * timeDiff) / ss.Config.HalfLife.Seconds() + decayFactor := math.Exp(exponent) + if decayFactor > 1 { + return fmt.Errorf("invalid larger than 1 decay factor, factor: %f", decayFactor) + } + + newNum, err := ss.CalcNewNum(sample, decayFactor) + if err != nil { + return err + } + newDenom, err := ss.CalcNewDenom(decayFactor) + if err != nil { + return err + } + + ss.Num = newNum + ss.Denom = newDenom + ss.Time = sampleTime + + if err := ss.Validate(); err != nil { + return errors.Wrap(err, "cannot update "+ss.Name+" ScoreStore's num and denom") + } + + return nil +} + +// CalcNewNum calculates the new numerator update and verifies it's not negative or overflowing +func (ss *ScoreStore) CalcNewNum(sample float64, decayFactor float64) (float64, error) { + if math.IsInf(ss.Num*decayFactor, 0) || math.IsInf(sample*ss.Config.Weight, 0) { + return 0, utils.LavaFormatError("cannot ScoreStore update numerator", fmt.Errorf("potential overflow"), + utils.LogAttr("score_store_name", ss.Name), + utils.LogAttr("current_num", ss.Num), + utils.LogAttr("decay_factor", decayFactor), + utils.LogAttr("sample", sample), + utils.LogAttr("weight", ss.Config.Weight), + ) + } + + newNum := ss.Num*decayFactor + sample*ss.Config.Weight + if newNum < 0 { + return 0, fmt.Errorf("cannot update %s ScoreStore, invalid negative numerator: %f", ss.Name, newNum) + } + return newNum, nil +} + +// CalcNewDenom calculates the new denominator update and verifies it's strictly positive or not overflowing +func (ss *ScoreStore) CalcNewDenom(decayFactor float64) (float64, error) { + if math.IsInf(ss.Denom*decayFactor, 0) || math.IsInf(ss.Config.Weight, 0) { + return 0, utils.LavaFormatError("cannot ScoreStore update denominator", fmt.Errorf("potential overflow"), + utils.LogAttr("score_store_name", ss.Name), + utils.LogAttr("current_denom", ss.Denom), + utils.LogAttr("decay_factor", decayFactor), + utils.LogAttr("weight", ss.Config.Weight), + ) + } + + newDenom := ss.Denom*decayFactor + ss.Config.Weight + if newDenom <= 0 { + return 0, fmt.Errorf("cannot update %s ScoreStore, invalid non-positive denominator: %f", ss.Name, newDenom) + } + return newDenom, nil +} + +func (ss *ScoreStore) GetName() string { + return ss.Name +} + +func (ss *ScoreStore) GetNum() float64 { + return ss.Num +} + +func (ss *ScoreStore) GetDenom() float64 { + return ss.Denom +} + +func (ss *ScoreStore) GetLastUpdateTime() time.Time { + return ss.Time +} + +func (ss *ScoreStore) GetConfig() Config { + return ss.Config +} + +func ConvertToDec(val float64) sdk.Dec { + if val > 0 && val < math.Pow(10, -float64(DecPrecision)) { + // If value is positive but would round to zero, return smallest possible value + return sdk.NewDecWithPrec(1, DecPrecision) + } + intScore := int64(math.Round(val * math.Pow(10, float64(DecPrecision)))) + return sdk.NewDecWithPrec(intScore, DecPrecision) +} + +// QoS excellence is a collection of performance metrics that measure a provider's +// performance in terms of latency, sync, and availability. +// These are calculated when the consumer processes responses from the provider. +// The consumer measures the provider's response latency, its reported last seen block +// (to check for sync) and whether the provider is responsive in general (availability). +// All three metrics are saved using the ScoreStore objects that implement the ScoreStorer +// interface. +// The QoS excellence score influences a provider's chance to be selected in the consumer +// pairing process. +// The metrics are: +// 1. Latency: the time it takes the provider to answer to consumer relays. +// +// 2. Sync: the difference between the latest block as the provider percieves it +// compared to the actual last block of the chain it serves. +// +// 3. Availability: the provider's up time. + +const ( + DefaultLatencyNum float64 = 0.01 + DefaultSyncNum float64 = 0.1 + DefaultAvailabilityNum float64 = 1 + + LatencyScoreType = "latency" + SyncScoreType = "sync" + AvailabilityScoreType = "availability" + + // Worst score results for each QoS excellence metric for truncation + WorstLatencyScore float64 = 30 // seconds + WorstSyncScore float64 = 20 * 60 // seconds + WorstAvailabilityScore float64 = 0.00001 // very small value to avoid score = 0 +) + +/* ########## Latency ScoreStore ############ */ + +type LatencyScoreStore struct { + *ScoreStore +} + +// Update updates the Latency ScoreStore's numerator and denominator with a new sample. +func (ls *LatencyScoreStore) Update(sample float64, sampleTime time.Time) error { + if ls == nil { + return fmt.Errorf("LatencyScoreStore is nil") + } + + // normalize the sample with the latency CU factor + sample *= ls.ScoreStore.Config.LatencyCuFactor + + return ls.ScoreStore.Update(sample, sampleTime) +} + +/* ########## Sync ScoreStore ############ */ + +type SyncScoreStore struct { + *ScoreStore +} + +// Update updates the Sync ScoreStore's numerator and denominator with a new sample. +func (ss *SyncScoreStore) Update(sample float64, sampleTime time.Time) error { + if ss == nil { + return fmt.Errorf("SyncScoreStore is nil") + } + return ss.ScoreStore.Update(sample, sampleTime) +} + +/* ########## Availability ScoreStore ############ */ + +type AvailabilityScoreStore struct { + *ScoreStore +} + +// Update updates the availability ScoreStore's numerator and denominator with a new sample. +// The new sample must be 0 or 1. +func (as *AvailabilityScoreStore) Update(sample float64, sampleTime time.Time) error { + if as == nil { + return fmt.Errorf("AvailabilityScoreStore is nil") + } + if sample != float64(0) && sample != float64(1) { + return fmt.Errorf("availability must be 0 (false) or 1 (true), got %f", sample) + } + return as.ScoreStore.Update(sample, sampleTime) +} + +func (as *AvailabilityScoreStore) Resolve() (float64, error) { + if as == nil { + return 0, fmt.Errorf("AvailabilityScoreStore is nil") + } + score, err := as.ScoreStore.Resolve() + if err != nil { + return 0, err + } + + // if the resolved score is equal to zero, return a very small number + // instead of zero since in the QoS Compute() method we divide by + // the availability score + if score == 0 { + score = WorstAvailabilityScore + } + return score, nil +} diff --git a/utils/score/score_store_test.go b/utils/score/score_store_test.go new file mode 100644 index 0000000000..e08b764a96 --- /dev/null +++ b/utils/score/score_store_test.go @@ -0,0 +1,435 @@ +package score_test + +import ( + "math" + "math/rand" + "testing" + "time" + + "github.com/lavanet/lava/v4/utils/score" + "github.com/stretchr/testify/require" +) + +func TestScoreStoreCreation(t *testing.T) { + num, denom, timestamp := float64(1), float64(2), time.Now() + weight, halfLife, latencyCuFactor := float64(4), 5*time.Second, float64(1) + opts := []score.Option{score.WithWeight(weight), score.WithDecayHalfLife(halfLife)} + negativeWeightOpts := []score.Option{score.WithWeight(-weight), score.WithDecayHalfLife(halfLife), score.WithLatencyCuFactor(latencyCuFactor)} + negativeHalflifeOpts := []score.Option{score.WithWeight(weight), score.WithDecayHalfLife(-halfLife), score.WithLatencyCuFactor(latencyCuFactor)} + negativeLatencyCuFactorOpts := []score.Option{score.WithWeight(weight), score.WithDecayHalfLife(halfLife), score.WithLatencyCuFactor(-latencyCuFactor)} + + template := []struct { + name string + scoreType string + num float64 + denom float64 + timestamp time.Time + opts []score.Option + valid bool + }{ + {name: "valid", scoreType: score.LatencyScoreType, num: num, denom: denom, timestamp: timestamp, opts: nil, valid: true}, + {name: "valid latency store with opts", scoreType: score.LatencyScoreType, num: num, denom: denom, timestamp: timestamp, opts: opts, valid: true}, + {name: "valid sync store with opts", scoreType: score.SyncScoreType, num: num, denom: denom, timestamp: timestamp, opts: opts, valid: true}, + {name: "valid availability store with opts", scoreType: score.AvailabilityScoreType, num: num, denom: denom, timestamp: timestamp, opts: opts, valid: true}, + + {name: "invalid negative num", scoreType: score.LatencyScoreType, num: -num, denom: denom, timestamp: timestamp, opts: nil, valid: false}, + {name: "invalid negative denom", scoreType: score.LatencyScoreType, num: num, denom: -denom, timestamp: timestamp, opts: nil, valid: false}, + {name: "invalid zero denom", scoreType: score.LatencyScoreType, num: num, denom: 0, timestamp: timestamp, opts: nil, valid: false}, + {name: "invalid option - negative weight", scoreType: score.LatencyScoreType, num: num, denom: denom, timestamp: timestamp, opts: negativeWeightOpts, valid: false}, + {name: "invalid option - negative half life", scoreType: score.LatencyScoreType, num: num, denom: denom, timestamp: timestamp, opts: negativeHalflifeOpts, valid: false}, + {name: "invalid option - negative latency cu factor", scoreType: score.LatencyScoreType, num: num, denom: denom, timestamp: timestamp, opts: negativeLatencyCuFactorOpts, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + store, err := score.NewCustomScoreStore(tt.scoreType, tt.num, tt.denom, tt.timestamp, tt.opts...) + if tt.valid { + require.NoError(t, err) + require.Equal(t, tt.scoreType, store.GetName()) + require.Equal(t, tt.num, store.GetNum()) + require.Equal(t, tt.denom, store.GetDenom()) + require.Equal(t, tt.timestamp, store.GetLastUpdateTime()) + if tt.opts != nil { + require.Equal(t, weight, store.GetConfig().Weight) + require.Equal(t, halfLife, store.GetConfig().HalfLife) + } + } else { + require.Error(t, err) + } + }) + } +} + +func TestDefaultScoreStoreCreation(t *testing.T) { + template := []struct { + name string + scoreType string + }{ + {name: "latency store", scoreType: score.LatencyScoreType}, + {name: "sync store", scoreType: score.SyncScoreType}, + {name: "availability store", scoreType: score.AvailabilityScoreType}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + store := score.NewScoreStore(tt.scoreType) + var expectedNum float64 + switch tt.scoreType { + case score.LatencyScoreType: + expectedNum = score.DefaultLatencyNum + case score.SyncScoreType: + expectedNum = score.DefaultSyncNum + case score.AvailabilityScoreType: + expectedNum = score.DefaultAvailabilityNum + } + + require.Equal(t, tt.scoreType, store.GetName()) + require.Equal(t, expectedNum, store.GetNum()) + require.Equal(t, float64(1), store.GetDenom()) + require.InEpsilon(t, time.Now().Add(-score.InitialDataStaleness).UTC().Unix(), store.GetLastUpdateTime().UTC().Unix(), 0.01) + require.Equal(t, score.DefaultWeight, store.GetConfig().Weight) + require.Equal(t, score.DefaultHalfLifeTime, store.GetConfig().HalfLife) + }) + } +} + +func TestScoreStoreValidation(t *testing.T) { + validConfig := score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 1} + invalidConfig1 := score.Config{Weight: -1, HalfLife: time.Second, LatencyCuFactor: 1} + invalidConfig2 := score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 1.01} + + template := []struct { + name string + store score.ScoreStore + valid bool + }{ + {name: "valid", store: score.ScoreStore{Name: "dummy", Num: 1, Denom: 1, Time: time.Now(), Config: validConfig}, valid: true}, + {name: "invalid negative num", store: score.ScoreStore{Name: "dummy", Num: -1, Denom: 1, Time: time.Now(), Config: validConfig}, valid: false}, + {name: "invalid negative denom", store: score.ScoreStore{Name: "dummy", Num: 1, Denom: -1, Time: time.Now(), Config: validConfig}, valid: false}, + {name: "invalid zero denom", store: score.ScoreStore{Name: "dummy", Num: 1, Denom: 0, Time: time.Now(), Config: validConfig}, valid: false}, + {name: "invalid config weight", store: score.ScoreStore{Name: "dummy", Num: 1, Denom: 1, Time: time.Now(), Config: invalidConfig1}, valid: false}, + {name: "invalid config latency cu factor", store: score.ScoreStore{Name: "dummy", Num: 1, Denom: 1, Time: time.Now(), Config: invalidConfig2}, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + err := tt.store.Validate() + if tt.valid { + require.NoError(t, err) + } else { + require.Error(t, err) + } + }) + } +} + +func TestScoreStoreResolve(t *testing.T) { + validConfig := score.Config{Weight: 1, HalfLife: time.Second, LatencyCuFactor: 0.1} + template := []struct { + name string + store score.ScoreStore + result float64 + valid bool + }{ + {name: "valid", store: score.ScoreStore{Num: 5, Denom: 16, Config: validConfig}, result: 0.3125, valid: true}, + {name: "invalid num", store: score.ScoreStore{Num: -5, Denom: 16, Config: validConfig}, result: 0.3125, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + res, err := tt.store.Resolve() + if tt.valid { + require.NoError(t, err) + require.Equal(t, tt.result, res) + } else { + require.Error(t, err) + } + }) + } +} + +func TestScoreStoreUpdateConfig(t *testing.T) { + store := score.NewScoreStore(score.LatencyScoreType) + weight, latencyCuFactor := float64(2), float64(1) + halfLife := 3 * time.Second + + validOpts := []score.Option{score.WithWeight(weight), score.WithDecayHalfLife(halfLife), score.WithLatencyCuFactor(latencyCuFactor)} + invalidOpts := []score.Option{score.WithWeight(-weight), score.WithDecayHalfLife(-halfLife), score.WithLatencyCuFactor(-latencyCuFactor)} + + err := store.UpdateConfig(validOpts...) + require.NoError(t, err) + require.Equal(t, weight, store.GetConfig().Weight) + require.Equal(t, halfLife, store.GetConfig().HalfLife) + require.Equal(t, latencyCuFactor, store.GetConfig().LatencyCuFactor) + + for _, opt := range invalidOpts { + err = store.UpdateConfig(opt) + require.Error(t, err) + require.Equal(t, weight, store.GetConfig().Weight) + require.Equal(t, halfLife, store.GetConfig().HalfLife) + require.Equal(t, latencyCuFactor, store.GetConfig().LatencyCuFactor) + } +} + +func TestScoreStoreUpdate(t *testing.T) { + num, denom, timestamp := float64(1), float64(2), time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC) + weight, halfLife, latencyCuFactor := float64(4), 5*time.Millisecond, 0.5 + sample, sampleTime := float64(1), timestamp.Add(10*time.Millisecond) + + // in this test, we add a sample after 10 milliseconds, so the exponent is: + // time_since_last_update/half_life_time = 10ms / 5ms = 2 + expectedNum := num*math.Exp(-2*math.Ln2) + weight*sample + expectedLatencyNum := math.Exp(-2*math.Ln2) + weight*sample*latencyCuFactor + expectedDenom := denom*math.Exp(-2*math.Ln2) + weight + + template := []struct { + name string + scoreType string + sample float64 + valid bool + }{ + {name: "valid latency", scoreType: score.LatencyScoreType, sample: sample, valid: true}, + {name: "valid sync", scoreType: score.SyncScoreType, sample: sample, valid: true}, + {name: "valid availability", scoreType: score.AvailabilityScoreType, sample: sample, valid: true}, + + {name: "invalid negative latency sample", scoreType: score.LatencyScoreType, sample: -sample, valid: false}, + {name: "invalid negative sync sample", scoreType: score.SyncScoreType, sample: -sample, valid: false}, + {name: "invalid negative availability sample", scoreType: score.AvailabilityScoreType, sample: -sample, valid: false}, + {name: "invalid availability sample - not 0/1", scoreType: score.AvailabilityScoreType, sample: 0.5, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + store, err := score.NewCustomScoreStore(tt.scoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(halfLife), score.WithLatencyCuFactor(latencyCuFactor)) + require.NoError(t, err) + + err = store.Update(tt.sample, sampleTime) + if tt.valid { + if tt.scoreType == score.LatencyScoreType { + require.Equal(t, expectedLatencyNum, store.GetNum()) + } else { + require.Equal(t, expectedNum, store.GetNum()) + } + require.Equal(t, expectedDenom, store.GetDenom()) + require.Equal(t, sampleTime, store.GetLastUpdateTime()) + } else { + require.Error(t, err) + } + }) + } +} + +// TestScoreStoreUpdateIdenticalSamples verifies that updating the score with +// many identical samples should keep the score value. In other words, the +// ScoreStore's num and denom will change, but resolving the fracture +// should have the same results as always +func TestScoreStoreUpdateIdenticalSamples(t *testing.T) { + num, denom, timestamp := float64(94), float64(17), time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC) + weight, halfLife := float64(4), 500*time.Millisecond + + store, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + + // update the ScoreStore with many identical samples + iterations := 50 + sampleTime := timestamp + sample := float64(20) + for i := 0; i < iterations; i++ { + sampleTime = sampleTime.Add(time.Duration(rand.Int63n(500)) * time.Millisecond) + err = store.Update(sample, sampleTime) + require.NoError(t, err) + } + + // with many identical samples, the expected score should be the sample value + expected := sample + score, err := store.Resolve() + require.NoError(t, err) + require.InEpsilon(t, expected, score, 0.000001) +} + +// TestScoreStoreUpdateIdenticalSamplesThenBetter verifies that updating the score with +// many identical samples and then better identical samples, the score value should be +// as the better sample value +func TestScoreStoreUpdateIdenticalSamplesThenBetter(t *testing.T) { + num, denom, timestamp := float64(94), float64(17), time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC) + weight, halfLife := float64(4), 500*time.Millisecond + + store, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + + // update the ScoreStore with many identical samples + iterations := 50 + sampleTime := timestamp + sample := float64(20) + for i := 0; i < iterations; i++ { + sampleTime = sampleTime.Add(time.Duration(rand.Int63n(500)) * time.Millisecond) + err = store.Update(sample, sampleTime) + require.NoError(t, err) + } + + // with many identical samples, the expected score should be the sample value + expected := sample + score, err := store.Resolve() + require.NoError(t, err) + require.InEpsilon(t, expected, score, 0.000001) + + // update the ScoreStore with many better identical samples + betterSample := float64(3) + for i := 0; i < iterations; i++ { + sampleTime = sampleTime.Add(time.Duration(rand.Int63n(500)) * time.Millisecond) + err = store.Update(betterSample, sampleTime) + require.NoError(t, err) + } + + // the expected score should be the better sample value + expected = betterSample + score, err = store.Resolve() + require.NoError(t, err) + require.InEpsilon(t, expected, score, 0.000001) +} + +// TestScoreStoreUpdateDecayFactors checks that updating a ScoreStore after a +// short/long time has a different influence on the ScoreStore. Since updating +// involves multiplying the old score value with a decay factor, adding a new +// sample after a long time should change the score more drastically +func TestScoreStoreUpdateDecayFactors(t *testing.T) { + num, denom, timestamp := float64(100), float64(20), time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC) + weight, halfLife := float64(4), 500*time.Millisecond + originalScore := num / denom + + // setup two identical stores + store1, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + store2, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + + // update first store with a sample after a short time, and the other + // with a sample after a long time + err = store1.Update(1, timestamp.Add(10*time.Millisecond)) + require.NoError(t, err) + err = store2.Update(1, timestamp.Add(500*time.Millisecond)) + require.NoError(t, err) + + // get the difference of each store's score from the original score + // store 2 should have a larger difference + score1, err := store1.Resolve() + require.NoError(t, err) + score2, err := store2.Resolve() + require.NoError(t, err) + require.Greater(t, math.Abs(score2-originalScore), math.Abs(score1-originalScore)) +} + +// TestScoreStoreStaysWithinRange tests that if all the samples +// are in range [x, y], then the resolved score is also between +// [x, y]. It should work for every decay factor and weights. +func TestScoreStoreStaysWithinRange(t *testing.T) { + timestamp, halfLife := time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC), 500*time.Millisecond + minRangeValue, maxRangeValue := float64(0), float64(100) + + store, err := score.NewCustomScoreStore(score.LatencyScoreType, 1, 1, timestamp, + score.WithWeight(1), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + + // update the ScoreStore with samples within the range with different weights and + // decay factors + iterations := 1000 + sampleTime := timestamp + for i := 0; i < iterations; i++ { + sampleTime = sampleTime.Add(time.Duration(rand.Int63n(500)) * time.Millisecond) + store.UpdateConfig(score.WithWeight(float64(rand.Int63n(int64(maxRangeValue))))) + err = store.Update(float64(rand.Int63n(int64(maxRangeValue))), sampleTime) + require.NoError(t, err) + } + + // the expected score should be within the defined range + score, err := store.Resolve() + require.NoError(t, err) + require.LessOrEqual(t, score, maxRangeValue) + require.GreaterOrEqual(t, score, minRangeValue) +} + +// TestScoreStoreHalfLife tests the update of ScoreStore for different +// half life factors. Assuming two identical stores, each with different +// half life factor, we update them in the same time. The store with the lower +// half life factor will be influenced more than the one with the higher half +// life factor +func TestScoreStoreHalfLife(t *testing.T) { + num, denom, timestamp := float64(100), float64(20), time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC) + weight := float64(4) + originalScore := num / denom + shortHalfLife, longHalfLife := 10*time.Millisecond, 500*time.Millisecond + + // setup two identical stores (store1 = short, store2 = long) + store1, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(shortHalfLife)) + require.NoError(t, err) + store2, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight), score.WithDecayHalfLife(longHalfLife)) + require.NoError(t, err) + + // update the stores with the same sample and sample time + err = store1.Update(1, timestamp.Add(100*time.Millisecond)) + require.NoError(t, err) + err = store2.Update(1, timestamp.Add(100*time.Millisecond)) + require.NoError(t, err) + + // get the difference of each store's score from the original score + // store 1 should have a larger difference (since it had the short + // half life factor) + score1, err := store1.Resolve() + require.NoError(t, err) + score2, err := store2.Resolve() + require.NoError(t, err) + require.Greater(t, math.Abs(score1-originalScore), math.Abs(score2-originalScore)) +} + +// TestScoreStoreWeight tests the update of ScoreStore for different +// weights. Assuming two identical stores, each with a different weight, +// we update them in the same time. The store with the higher weight +// will be influenced more than the other one +func TestScoreStoreWeight(t *testing.T) { + num, denom, timestamp := float64(100), float64(20), time.Date(0, 0, 0, 0, 0, 1, 0, time.UTC) + halfLife := 500 * time.Millisecond + originalScore := num / denom + weight1, weight2 := float64(4), float64(40) + + // setup two identical stores (store1 = low weight, store2 = high weight) + store1, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight1), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + store2, err := score.NewCustomScoreStore(score.LatencyScoreType, num, denom, timestamp, + score.WithWeight(weight2), score.WithDecayHalfLife(halfLife)) + require.NoError(t, err) + + // update the stores with the same sample and sample time + err = store1.Update(1, timestamp.Add(100*time.Millisecond)) + require.NoError(t, err) + err = store2.Update(1, timestamp.Add(100*time.Millisecond)) + require.NoError(t, err) + + // get the difference of each store's score from the original score + // store 2 should have a larger difference (since it had the short + // half life factor) + score1, err := store1.Resolve() + require.NoError(t, err) + score2, err := store2.Resolve() + require.NoError(t, err) + require.Greater(t, math.Abs(score2-originalScore), math.Abs(score1-originalScore)) +} + +// TestScoreStoreAvailabilityResolveNonZero verifies that the Resolve() +// method of the AvailabilityScoreStore doesn't return zero when num/denom = 0 +// Zero is undesirable since in QoS Compute() method we divide by the +// availability score +func TestScoreStoreAvailabilityResolveNonZero(t *testing.T) { + store, err := score.NewCustomScoreStore(score.AvailabilityScoreType, 0, 1, time.Now()) + require.NoError(t, err) + score, err := store.Resolve() + require.NoError(t, err) + require.NotZero(t, score) +} diff --git a/x/pairing/keeper/msg_server_relay_payment.go b/x/pairing/keeper/msg_server_relay_payment.go index 9eb2017373..df4fb3bd14 100644 --- a/x/pairing/keeper/msg_server_relay_payment.go +++ b/x/pairing/keeper/msg_server_relay_payment.go @@ -501,7 +501,7 @@ func (k Keeper) aggregateReputationEpochQosScore(ctx sdk.Context, subscription s } syncFactor := k.ReputationLatencyOverSyncFactor(ctx) - score, err := relay.QosExcellenceReport.ComputeQosExcellenceForReputation(syncFactor) + score, err := relay.QosExcellenceReport.ComputeQoSExcellence(types.WithSyncFactor(syncFactor)) if err != nil { return utils.LavaFormatWarning("RelayPayment: could not compute qos excellence score", err, utils.LogAttr("consumer", subscription), diff --git a/x/pairing/types/QualityOfServiceReport.go b/x/pairing/types/QualityOfServiceReport.go deleted file mode 100644 index 1107688549..0000000000 --- a/x/pairing/types/QualityOfServiceReport.go +++ /dev/null @@ -1,72 +0,0 @@ -package types - -import ( - "fmt" - - "cosmossdk.io/math" - sdk "github.com/cosmos/cosmos-sdk/types" - "github.com/lavanet/lava/v4/utils" -) - -func (qos *QualityOfServiceReport) ComputeQoS() (sdk.Dec, error) { - if qos.Availability.GT(sdk.OneDec()) || qos.Availability.LT(sdk.ZeroDec()) || - qos.Latency.GT(sdk.OneDec()) || qos.Latency.LT(sdk.ZeroDec()) || - qos.Sync.GT(sdk.OneDec()) || qos.Sync.LT(sdk.ZeroDec()) { - return sdk.ZeroDec(), fmt.Errorf("QoS scores is not between 0-1") - } - - return qos.Availability.Mul(qos.Sync).Mul(qos.Latency).ApproxRoot(3) -} - -func (qos *QualityOfServiceReport) ComputeQoSExcellence() (sdk.Dec, error) { - if qos.Availability.LTE(sdk.ZeroDec()) || - qos.Latency.LTE(sdk.ZeroDec()) || - qos.Sync.LTE(sdk.ZeroDec()) { - return sdk.ZeroDec(), fmt.Errorf("QoS excellence scores is below 0") - } - return qos.Availability.Quo(qos.Sync).Quo(qos.Latency).ApproxRoot(3) -} - -// ComputeQosExcellenceForReputation computes the score from the QoS excellence report to update the provider's reputation -// report score = latency + sync*syncFactor + ((1/availability) - 1) * FailureCost (note: larger value is worse) -func (qos QualityOfServiceReport) ComputeQosExcellenceForReputation(syncFactor math.LegacyDec) (math.LegacyDec, error) { - if qos.Availability.LT(sdk.ZeroDec()) || - qos.Latency.LT(sdk.ZeroDec()) || - qos.Sync.LT(sdk.ZeroDec()) || syncFactor.LT(sdk.ZeroDec()) { - return sdk.ZeroDec(), utils.LavaFormatWarning("ComputeQosExcellenceForReputation: compute failed", fmt.Errorf("QoS excellence scores is below 0"), - utils.LogAttr("availability", qos.Availability.String()), - utils.LogAttr("sync", qos.Sync.String()), - utils.LogAttr("latency", qos.Latency.String()), - ) - } - - latency := qos.Latency - sync := qos.Sync.Mul(syncFactor) - availability := math.LegacyNewDec(FailureCost) - if !qos.Availability.IsZero() { - availability = availability.Mul((math.LegacyOneDec().Quo(qos.Availability)).Sub(math.LegacyOneDec())) - } else { - availability = math.LegacyMaxSortableDec.QuoInt64(2) // on qs.Availability = 0 we take the largest score possible - } - return latency.Add(sync).Add(availability), nil -} - -// ValidateAndFixQoSExcellence is a temporary function to validate the QoS excellence report -// TODO: remove after the optimizer refactor is merged -func (qos *QualityOfServiceReport) ValidateAndFixQoSExcellence() error { - if qos == nil { - return fmt.Errorf("QoS excellence report is nil") - } - - if qos.Availability.LT(sdk.ZeroDec()) { - qos.Availability = sdk.ZeroDec() - } - if qos.Latency.LT(sdk.ZeroDec()) { - qos.Latency = sdk.ZeroDec() - } - if qos.Sync.LT(sdk.ZeroDec()) { - qos.Sync = sdk.ZeroDec() - } - - return nil -} diff --git a/x/pairing/types/QualityOfServiceReport_test.go b/x/pairing/types/QualityOfServiceReport_test.go deleted file mode 100644 index 9ed38e134d..0000000000 --- a/x/pairing/types/QualityOfServiceReport_test.go +++ /dev/null @@ -1,100 +0,0 @@ -package types - -import ( - "testing" - - "cosmossdk.io/math" - sdk "github.com/cosmos/cosmos-sdk/types" - "github.com/stretchr/testify/require" -) - -func createTestQosReportScores(forReputation bool) ([]math.LegacyDec, error) { - qos1 := &QualityOfServiceReport{ - Latency: sdk.MustNewDecFromStr("1.5"), - Availability: sdk.MustNewDecFromStr("1"), - Sync: sdk.MustNewDecFromStr("0.1"), - } - qos2 := &QualityOfServiceReport{ - Latency: sdk.MustNewDecFromStr("0.2"), - Availability: sdk.MustNewDecFromStr("1"), - Sync: sdk.MustNewDecFromStr("0.1"), - } - qos3 := &QualityOfServiceReport{ - Latency: sdk.MustNewDecFromStr("0.1"), - Availability: sdk.MustNewDecFromStr("1"), - Sync: sdk.MustNewDecFromStr("0.5"), - } - qos4 := &QualityOfServiceReport{ - Latency: sdk.MustNewDecFromStr("0.1"), - Availability: sdk.MustNewDecFromStr("0.5"), - Sync: sdk.MustNewDecFromStr("0.5"), - } - - res := []math.LegacyDec{} - if forReputation { - syncFactor := sdk.MustNewDecFromStr("0.5") - qos1Res, errQos1 := qos1.ComputeQosExcellenceForReputation(syncFactor) - if errQos1 != nil { - return nil, errQos1 - } - qos2Res, errQos2 := qos2.ComputeQosExcellenceForReputation(syncFactor) - if errQos2 != nil { - return nil, errQos2 - } - qos3Res, errQos3 := qos3.ComputeQosExcellenceForReputation(syncFactor) - if errQos3 != nil { - return nil, errQos3 - } - qos4Res, errQos4 := qos4.ComputeQosExcellenceForReputation(syncFactor) - if errQos4 != nil { - return nil, errQos4 - } - res = append(res, qos1Res, qos2Res, qos3Res, qos4Res) - } else { - qos1Res, errQos1 := qos1.ComputeQoSExcellence() - if errQos1 != nil { - return nil, errQos1 - } - qos2Res, errQos2 := qos2.ComputeQoSExcellence() - if errQos2 != nil { - return nil, errQos2 - } - qos3Res, errQos3 := qos3.ComputeQoSExcellence() - if errQos3 != nil { - return nil, errQos3 - } - qos4Res, errQos4 := qos4.ComputeQoSExcellence() - if errQos4 != nil { - return nil, errQos4 - } - res = append(res, qos1Res, qos2Res, qos3Res, qos4Res) - } - - return res, nil -} - -func TestQosReport(t *testing.T) { - res, err := createTestQosReportScores(false) - require.NoError(t, err) - require.True(t, res[0].LT(res[1])) - require.True(t, res[0].LT(res[2])) - require.True(t, res[0].LT(res[3])) - - require.True(t, res[1].GT(res[2])) - require.True(t, res[1].GT(res[3])) - - require.True(t, res[3].LT(res[2])) -} - -func TestQosReportForReputation(t *testing.T) { - res, err := createTestQosReportScores(true) - require.NoError(t, err) - require.True(t, res[0].GT(res[1])) - require.True(t, res[0].GT(res[2])) - require.True(t, res[0].LT(res[3])) - - require.True(t, res[1].LT(res[2])) - require.True(t, res[1].LT(res[3])) - - require.True(t, res[3].GT(res[2])) -} diff --git a/x/pairing/types/qos_report.go b/x/pairing/types/qos_report.go new file mode 100644 index 0000000000..01367e71ec --- /dev/null +++ b/x/pairing/types/qos_report.go @@ -0,0 +1,195 @@ +package types + +import ( + "fmt" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/utils" + "github.com/lavanet/lava/v4/utils/score" +) + +// QoS (quality of service) is a report that consists three metrics that are +// used to measure providers performance. The metrics are: +// 1. Latency: the time it takes the provider to answer to consumer relays. +// +// 2. Sync: the latest block that the provider percieves is close to the actual +// last block of the chain. +// +// 3. Availability: the provider's up time. + +var ( + DefaultFailureCost int64 = 3 + DefaultSyncFactor = sdk.NewDecWithPrec(3, 1) // 0.3 + DefaultStrategyFactor = BalancedStrategyFactor + DefaultBlockErrorProbability = sdk.NewDec(-1) // default: BlockErrorProbability should not be used + + // strategy factors (used as multipliers to the sync factor) + // 1. balanced strategy: multiply the sync factor by 1 -> staying with default sync factor + // 2. latency strategy: make latency more influential -> divide the default sync factor by 3 + // 3. sync freshness strategy: make sync more influential -> multiply the default sync factor by 3 + BalancedStrategyFactor = sdk.OneDec() // 1 + LatencyStrategyFactor = sdk.OneDec().QuoInt64(3) // 1/3 + SyncFreshnessStrategyFactor = sdk.NewDec(30) // 3 +) + +// Config defines a collection of parameters that can be used when calculating +// a QoS excellence report score +type Config struct { + SyncFactor sdk.Dec // a fractional factor to diminish the sync score influence compared to the latency score + FailureCost int64 // the cost (in seconds) for a provider failing to service a relay + StrategyFactor sdk.Dec // a factor to further configure the sync factor + BlockErrorProbability sdk.Dec // a probability that a provider doesn't have the requested block the optimizer needs (used for non-latest QoS scores) +} + +// Validate validates the Config's fields hold valid values +func (c Config) Validate() error { + if c.SyncFactor.IsNegative() || c.SyncFactor.GT(sdk.OneDec()) { + return fmt.Errorf("invalid config: sync factor must be between 0-1, sync factor: %s", c.SyncFactor.String()) + } + if c.FailureCost < 0 { + return fmt.Errorf("invalid config: failure cost cannot be negative, failure cost: %d", c.FailureCost) + } + if c.StrategyFactor.IsNegative() { + return fmt.Errorf("invalid config: strategy factor cannot be negative, failure cost: %s", c.StrategyFactor.String()) + } + if !c.BlockErrorProbability.Equal(DefaultBlockErrorProbability) && (c.BlockErrorProbability.IsNegative() || c.BlockErrorProbability.GT(sdk.OneDec())) { + return fmt.Errorf("invalid config: block error probability must be default unused (-1) or between 0-1, probability: %s", c.BlockErrorProbability.String()) + } + + return nil +} + +// String prints a Config's fields +func (c Config) String() string { + return fmt.Sprintf("sync factor: %s, failure cost sec: %d, strategy factor: %s, block error probability: %s", + c.SyncFactor.String(), c.FailureCost, c.StrategyFactor.String(), c.BlockErrorProbability.String()) +} + +// Default configuration +var DefaultConfig = Config{ + SyncFactor: DefaultSyncFactor, + FailureCost: DefaultFailureCost, + StrategyFactor: DefaultStrategyFactor, + BlockErrorProbability: DefaultBlockErrorProbability, +} + +// Option is used as a generic and elegant way to configure a new ScoreStore +type Option func(*Config) + +func WithSyncFactor(factor sdk.Dec) Option { + return func(c *Config) { + c.SyncFactor = factor + } +} + +func WithFailureCost(cost int64) Option { + return func(c *Config) { + c.FailureCost = cost + } +} + +func WithStrategyFactor(factor sdk.Dec) Option { + return func(c *Config) { + c.StrategyFactor = factor + } +} + +func WithBlockErrorProbability(probability sdk.Dec) Option { + return func(c *Config) { + c.BlockErrorProbability = probability + } +} + +// ComputeQoSExcellence calculates a score from the QoS excellence report by the following formula: +// If the requested block is the latest block or "not applicable" (called from the node's code): +// +// score = latency + sync*syncFactor + ((1/availability) - 1) * FailureCost +// +// note, the syncFactor is multiplied by the strategy factor +// +// for every other request: +// +// score = latency + blockErrorProbability * FailureCost + ((1/availability) - 1) * FailureCost +// +// Important: when using this function from the node's code, do not configure the block error probability +// (in default mode, it's unused) +// TODO: after the reputation feature is merged, use this method to calculate the QoS excellence score +func (qos *QualityOfServiceReport) ComputeQoSExcellence(opts ...Option) (sdk.Dec, error) { + if err := qos.Validate(); err != nil { + return sdk.ZeroDec(), err + } + + cfg := DefaultConfig + for _, opt := range opts { + opt(&cfg) + } + if err := cfg.Validate(); err != nil { + return sdk.ZeroDec(), err + } + + latency := qos.Latency + sync := qos.Sync.Mul(cfg.SyncFactor).Mul(cfg.StrategyFactor) + if !cfg.BlockErrorProbability.Equal(DefaultBlockErrorProbability) { + // BlockErrorProbability is not default, calculate sync using it (already validated above in cfg.Validate()) + sync = cfg.BlockErrorProbability.MulInt64(cfg.FailureCost) + } + availability := ((sdk.OneDec().Quo(qos.Availability)).Sub(sdk.OneDec())).MulInt64(cfg.FailureCost) + + return latency.Add(sync).Add(availability), nil +} + +func (qos *QualityOfServiceReport) ComputeQoSExcellenceFloat64(opts ...Option) (float64, error) { + scoreDec, err := qos.ComputeQoSExcellence(opts...) + if err != nil { + return 0, err + } + score, err := scoreDec.Float64() + if err != nil { + return 0, err + } + return score, nil +} + +func (qos *QualityOfServiceReport) Validate() error { + if qos.Latency.IsNegative() { + return fmt.Errorf("invalid QoS latency, latency is negative: %s", qos.Latency.String()) + } + if qos.Sync.IsNegative() { + return fmt.Errorf("invalid QoS sync, sync is negative: %s", qos.Sync.String()) + } + if qos.Availability.IsNegative() || qos.Availability.IsZero() { + return fmt.Errorf("invalid QoS availability, availability is non-positive: %s", qos.Availability.String()) + } + + return nil +} + +func (qos *QualityOfServiceReport) ComputeQoS() (sdk.Dec, error) { + if qos.Availability.GT(sdk.OneDec()) || qos.Availability.LT(sdk.ZeroDec()) || + qos.Latency.GT(sdk.OneDec()) || qos.Latency.LT(sdk.ZeroDec()) || + qos.Sync.GT(sdk.OneDec()) || qos.Sync.LT(sdk.ZeroDec()) { + return sdk.ZeroDec(), fmt.Errorf("QoS scores is not between 0-1") + } + + return qos.Availability.Mul(qos.Sync).Mul(qos.Latency).ApproxRoot(3) +} + +func (qos *QualityOfServiceReport) GetScoresFloat64() (float64, float64, float64) { + latency, err := qos.Latency.Float64() + if err != nil { + utils.LavaFormatError("critical: failed to convert latency score to float64", err, utils.LogAttr("latency", qos.Latency.String())) + latency = score.WorstLatencyScore + } + sync, err := qos.Sync.Float64() + if err != nil { + utils.LavaFormatError("critical: failed to convert sync score to float64", err, utils.LogAttr("sync", qos.Sync.String())) + sync = score.WorstSyncScore + } + availability, err := qos.Availability.Float64() + if err != nil { + utils.LavaFormatError("critical: failed to convert availability score to float64", err, utils.LogAttr("availability", qos.Availability.String())) + availability = score.WorstAvailabilityScore + } + + return latency, sync, availability +} diff --git a/x/pairing/types/qos_report_test.go b/x/pairing/types/qos_report_test.go new file mode 100644 index 0000000000..6f7eaaa0f1 --- /dev/null +++ b/x/pairing/types/qos_report_test.go @@ -0,0 +1,226 @@ +package types_test + +import ( + "testing" + + sdk "github.com/cosmos/cosmos-sdk/types" + "github.com/lavanet/lava/v4/x/pairing/types" + "github.com/stretchr/testify/require" +) + +func TestQosConfigValidation(t *testing.T) { + template := []struct { + name string + config types.Config + valid bool + }{ + {name: "valid", config: types.DefaultConfig, valid: true}, + {name: "valid - default block error probabililty (-1)", config: types.Config{SyncFactor: sdk.OneDec(), FailureCost: 3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: types.DefaultBlockErrorProbability}, valid: true}, + + {name: "invalid negative sync factor", config: types.Config{SyncFactor: sdk.NewDec(-1), FailureCost: 3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: sdk.OneDec()}, valid: false}, + {name: "invalid greater than one sync factor", config: types.Config{SyncFactor: sdk.NewDec(2), FailureCost: 3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: sdk.OneDec()}, valid: false}, + {name: "invalid negative failure cost", config: types.Config{SyncFactor: sdk.OneDec(), FailureCost: -3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: sdk.OneDec()}, valid: false}, + {name: "invalid negative strategy factor", config: types.Config{SyncFactor: sdk.OneDec(), FailureCost: 3, StrategyFactor: sdk.NewDec(-1), BlockErrorProbability: sdk.OneDec()}, valid: false}, + {name: "invalid negative block error probabililty (excluding default)", config: types.Config{SyncFactor: sdk.OneDec(), FailureCost: 3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: sdk.NewDec(-2)}, valid: false}, + {name: "invalid greater than 1 block error probabililty", config: types.Config{SyncFactor: sdk.OneDec(), FailureCost: 3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: sdk.NewDec(2)}, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + if tt.valid { + require.NoError(t, err) + } else { + require.Error(t, err) + } + }) + } +} + +func TestQosConfigModification(t *testing.T) { + config := types.Config{SyncFactor: sdk.OneDec(), FailureCost: 3, StrategyFactor: sdk.OneDec(), BlockErrorProbability: types.DefaultBlockErrorProbability} + syncFactor := sdk.NewDec(2) + failureCost := int64(3) + strategyFactor := sdk.NewDec(2) + blockErrorProbability := sdk.OneDec() + + opts := []types.Option{ + types.WithSyncFactor(syncFactor), + types.WithFailureCost(failureCost), + types.WithStrategyFactor(strategyFactor), + types.WithBlockErrorProbability(blockErrorProbability), + } + for _, opt := range opts { + opt(&config) + } + + require.True(t, syncFactor.Equal(config.SyncFactor)) + require.Equal(t, failureCost, config.FailureCost) + require.True(t, strategyFactor.Equal(config.StrategyFactor)) + require.True(t, blockErrorProbability.Equal(config.BlockErrorProbability)) +} + +func TestQosValidation(t *testing.T) { + latency, sync, availability := sdk.OneDec(), sdk.OneDec(), sdk.OneDec() + + template := []struct { + name string + qos types.QualityOfServiceReport + valid bool + }{ + {name: "valid", qos: types.QualityOfServiceReport{Latency: latency, Sync: sync, Availability: availability}, valid: true}, + {name: "invalid negative latency", qos: types.QualityOfServiceReport{Latency: latency.Neg(), Sync: sync, Availability: availability}, valid: false}, + {name: "invalid negative sync", qos: types.QualityOfServiceReport{Latency: latency, Sync: sync.Neg(), Availability: availability}, valid: false}, + {name: "invalid negative availability", qos: types.QualityOfServiceReport{Latency: latency, Sync: sync, Availability: availability.Neg()}, valid: false}, + {name: "invalid zero availability", qos: types.QualityOfServiceReport{Latency: latency, Sync: sync, Availability: sdk.ZeroDec()}, valid: false}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + err := tt.qos.Validate() + if tt.valid { + require.NoError(t, err) + } else { + require.Error(t, err) + } + }) + } +} + +// TestQosCompute verifies the QoS object Compute() method works as expected for its two cases: normal +// and with configured block error probability +func TestQosCompute(t *testing.T) { + blockErrorProbability := sdk.OneDec() + qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.OneDec()} + + // with the given QoS report and the default config, the expected score results: + // normal: score = latency + sync*syncFactor + ((1/availability) - 1) * FailureCost = 1 + 1*0.3 + (1/1 - 1) * 3 = 1.3 + // with block error probability: score = latency + blockErrorProbability * FailureCost + ((1/availability) - 1) * FailureCost = 1 + 1*3 + (1/1 - 1) * 3 = 4 + expectedScoreDefault := sdk.NewDecWithPrec(13, 1) + expectedScoreBlockErrorProbability := sdk.NewDec(4) + + template := []struct { + name string + opts []types.Option + expectedScore sdk.Dec + }{ + {name: "normal", opts: []types.Option{}, expectedScore: expectedScoreDefault}, + {name: "with block error probability", opts: []types.Option{types.WithBlockErrorProbability(blockErrorProbability)}, expectedScore: expectedScoreBlockErrorProbability}, + } + + for _, tt := range template { + t.Run(tt.name, func(t *testing.T) { + score, err := qos.ComputeQoSExcellence(tt.opts...) + require.NoError(t, err) + require.True(t, tt.expectedScore.Equal(score)) + }) + } +} + +// TestQosFailureCost checks that higher failure cost means worse score +func TestQosFailureCost(t *testing.T) { + qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.NewDecWithPrec(5, 1)} + failureCost, highFailureCost := int64(1), int64(3) + + score, err := qos.ComputeQoSExcellence(types.WithFailureCost(failureCost)) + require.NoError(t, err) + scoreHighFailure, err := qos.ComputeQoSExcellence(types.WithFailureCost(highFailureCost)) + require.NoError(t, err) + require.True(t, scoreHighFailure.GT(score)) + + scoreWithProb, err := qos.ComputeQoSExcellence(types.WithFailureCost(failureCost), types.WithBlockErrorProbability(sdk.OneDec())) + require.NoError(t, err) + scoreHighFailureWithProb, err := qos.ComputeQoSExcellence(types.WithFailureCost(highFailureCost), types.WithBlockErrorProbability(sdk.OneDec())) + require.NoError(t, err) + require.True(t, scoreHighFailureWithProb.GT(scoreWithProb)) +} + +// TestQosSyncFactor checks that higher syncFactor means worse score +func TestQosSyncFactor(t *testing.T) { + qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.NewDecWithPrec(5, 1)} + syncFactor, highSyncFactor := sdk.NewDecWithPrec(5, 1), sdk.NewDecWithPrec(8, 1) + + score, err := qos.ComputeQoSExcellence(types.WithSyncFactor(syncFactor)) + require.NoError(t, err) + scoreHighSyncFactor, err := qos.ComputeQoSExcellence(types.WithSyncFactor(highSyncFactor)) + require.NoError(t, err) + require.True(t, scoreHighSyncFactor.GT(score)) +} + +// TestQosStrategyFactor checks that the strategy factor works as expected +// The strategy factor is an additional multiplier to the sync factor that is used +// to weaken/strengthen the influence of the sync score compared to the latency +func TestQosStrategyFactor(t *testing.T) { + // we configure availability = 1 to zero the availability + qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.OneDec()} + + // we get the balancedScore with a balanced strategy and subtract the latency component of the balancedScore + // this way, our balancedScore will only be syncFactor*sync (syncFactor = configuredSyncFactor * strategyFactor) + balancedScore, err := qos.ComputeQoSExcellence(types.WithStrategyFactor(types.BalancedStrategyFactor)) + require.NoError(t, err) + balancedScore = balancedScore.Sub(sdk.OneDec()) + + // calculate score with latency strategy - sync component should be smaller than the component in balancedScore + latencyScore, err := qos.ComputeQoSExcellence(types.WithStrategyFactor(types.LatencyStrategyFactor)) + require.NoError(t, err) + latencyScore = latencyScore.Sub(sdk.OneDec()) + require.True(t, balancedScore.GT(latencyScore)) + + // calculate score with sync freshness strategy - sync component should be bigger than the component in balancedScore + syncScore, err := qos.ComputeQoSExcellence(types.WithStrategyFactor(types.SyncFreshnessStrategyFactor)) + require.NoError(t, err) + syncScore = syncScore.Sub(sdk.OneDec()) + require.True(t, balancedScore.LT(syncScore)) +} + +// TestQosBlockErrorProbability checks that larger block error probability means worse score +func TestQosBlockErrorProbability(t *testing.T) { + qos := types.QualityOfServiceReport{Latency: sdk.OneDec(), Sync: sdk.OneDec(), Availability: sdk.OneDec()} + probabililty, highProbabililty := sdk.NewDecWithPrec(5, 1), sdk.NewDecWithPrec(8, 1) + + score, err := qos.ComputeQoSExcellence(types.WithBlockErrorProbability(probabililty)) + require.NoError(t, err) + scoreHighProbabililty, err := qos.ComputeQoSExcellence(types.WithBlockErrorProbability(highProbabililty)) + require.NoError(t, err) + require.True(t, scoreHighProbabililty.GT(score)) +} + +func TestQosReport(t *testing.T) { + qos1 := &types.QualityOfServiceReport{ + Latency: sdk.MustNewDecFromStr("1.5"), + Availability: sdk.MustNewDecFromStr("1"), + Sync: sdk.MustNewDecFromStr("0.1"), + } + qos2 := &types.QualityOfServiceReport{ + Latency: sdk.MustNewDecFromStr("0.2"), + Availability: sdk.MustNewDecFromStr("1"), + Sync: sdk.MustNewDecFromStr("0.1"), + } + qos3 := &types.QualityOfServiceReport{ + Latency: sdk.MustNewDecFromStr("0.1"), + Availability: sdk.MustNewDecFromStr("1"), + Sync: sdk.MustNewDecFromStr("0.5"), + } + qos4 := &types.QualityOfServiceReport{ + Latency: sdk.MustNewDecFromStr("0.1"), + Availability: sdk.MustNewDecFromStr("0.5"), + Sync: sdk.MustNewDecFromStr("0.5"), + } + + qos1Res, errQos1 := qos1.ComputeQoSExcellence() + qos2Res, errQos2 := qos2.ComputeQoSExcellence() + qos3Res, errQos3 := qos3.ComputeQoSExcellence() + qos4Res, errQos4 := qos4.ComputeQoSExcellence() + require.NoError(t, errQos1) + require.NoError(t, errQos2) + require.NoError(t, errQos3) + require.NoError(t, errQos4) + require.True(t, qos1Res.GT(qos2Res)) + require.True(t, qos1Res.GT(qos3Res)) + require.True(t, qos4Res.GT(qos1Res)) + + require.True(t, qos2Res.LT(qos3Res)) + require.True(t, qos2Res.LT(qos4Res)) + + require.True(t, qos4Res.GT(qos3Res)) +}