From d133bad2a112d706da0d77a083acb6c28eda835f Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 12:22:50 -0400 Subject: [PATCH 1/8] remove outliers in gpo calc --- eth/gasprice/gasprice.go | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index 5d8be08e0b78..fab9ea74e24c 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -29,7 +29,9 @@ import ( "github.com/ethereum/go-ethereum/rpc" ) -const sampleNumber = 3 // Number of transactions sampled in a block +// Number of transactions sampled in a block +// Roughly the last 5 minutes +const sampleNumber = 20 var DefaultMaxPrice = big.NewInt(500 * params.GWei) @@ -207,12 +209,44 @@ func (gpo *Oracle) getBlockPrices(ctx context.Context, signer types.Signer, bloc } } } + if len(prices) > 0 { + prices = removeOutliers(prices) + } select { case result <- getBlockPricesResult{prices, nil}: case <-quit: } } +// removeOutliers calculates the IQR to gasPrices that are significant outliers +// This helps remove edgecases where MEV skew the gasPrices +// Assumes len(prices) != 0, and prices are sorted +func removeOutliers(prices []*big.Int) []*big.Int { + var ( + mean *big.Int + variance *big.Int + sd *big.Int + sum = big.NewInt(0) + sumsq = big.NewInt(0) + length = big.NewInt(int64(len(prices))) + deviations = big.NewInt(3) // The max number of std from the mean we will accept + ) + for _, price := range prices { + sum.Add(sum, price) + sumsq.Add(sum, price.Mul(price, price)) + } + mean = sum.Div(sum, length) + variance = sumsq.Sub(sumsq.Div(sumsq, length), mean.Mul(mean, mean)) + sd = variance.Sqrt(variance) + var filtered = make([]*big.Int, 0) + for _, price := range prices { + if price.Cmp(sd.Mul(sd, mean.Sub(mean, deviations))) == 1 && price.Cmp(sd.Mul(sd, mean.Add(mean, deviations))) == -1 { + filtered = append(filtered, price) + } + } + return filtered +} + type bigIntArray []*big.Int func (s bigIntArray) Len() int { return len(s) } From 963537970cdd3e91d72536229d618269061c7911 Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 12:45:52 -0400 Subject: [PATCH 2/8] add sort and test --- eth/gasprice/gasprice.go | 3 ++- eth/gasprice/gasprice_test.go | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index fab9ea74e24c..2e540372d886 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -220,7 +220,7 @@ func (gpo *Oracle) getBlockPrices(ctx context.Context, signer types.Signer, bloc // removeOutliers calculates the IQR to gasPrices that are significant outliers // This helps remove edgecases where MEV skew the gasPrices -// Assumes len(prices) != 0, and prices are sorted +// Assumes len(prices) != 0 func removeOutliers(prices []*big.Int) []*big.Int { var ( mean *big.Int @@ -231,6 +231,7 @@ func removeOutliers(prices []*big.Int) []*big.Int { length = big.NewInt(int64(len(prices))) deviations = big.NewInt(3) // The max number of std from the mean we will accept ) + sort.Sort(bigIntArray(prices)) for _, price := range prices { sum.Add(sum, price) sumsq.Add(sum, price.Mul(price, price)) diff --git a/eth/gasprice/gasprice_test.go b/eth/gasprice/gasprice_test.go index 4fd2df10e2bd..cf53b7f6fa50 100644 --- a/eth/gasprice/gasprice_test.go +++ b/eth/gasprice/gasprice_test.go @@ -20,7 +20,9 @@ import ( "context" "math" "math/big" + "math/rand" "testing" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/ethash" @@ -116,3 +118,22 @@ func TestSuggestPrice(t *testing.T) { t.Fatalf("Gas price mismatch, want %d, got %d", expect, got) } } + +func TestRemoveOutliers(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + min := 180 + max := 220 + gasPrices := make([]*big.Int, 0) + for i := 0; i < 300; i++ { + randGasPrice := rand.Intn(max-min+1) + min + gasPrices = append(gasPrices, big.NewInt(int64(randGasPrice))) + } + cpy := make([]*big.Int, len(gasPrices)) + // add low gas prices + cpy = append(cpy, big.NewInt(5)) + cpy = append(cpy, big.NewInt(10)) + cpy = append(cpy, big.NewInt(15)) + + res := removeOutliers(cpy) + t.Fatalf("Low gas prices not removed, want %d, got %d", gasPrices, res) +} From 4716b141daebdf52ef207d897f45fb93e5068bee Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 12:47:19 -0400 Subject: [PATCH 3/8] udpdate comment --- eth/gasprice/gasprice.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index 2e540372d886..ad0b2af0f268 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -218,9 +218,9 @@ func (gpo *Oracle) getBlockPrices(ctx context.Context, signer types.Signer, bloc } } -// removeOutliers calculates the IQR to gasPrices that are significant outliers -// This helps remove edgecases where MEV skew the gasPrices -// Assumes len(prices) != 0 +// removeOutliers calculates the interquartile range of gas prices that are +// significant outliers with the goal of removing edge cases where MEV skews +// the gas prices func removeOutliers(prices []*big.Int) []*big.Int { var ( mean *big.Int From adbaeaa9def0d6a56e9621de75941d75b061394a Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 13:43:35 -0400 Subject: [PATCH 4/8] wip --- eth/gasprice/gasprice.go | 14 +++++++++----- eth/gasprice/gasprice_test.go | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index ad0b2af0f268..5987d18c57fa 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -221,7 +221,11 @@ func (gpo *Oracle) getBlockPrices(ctx context.Context, signer types.Signer, bloc // removeOutliers calculates the interquartile range of gas prices that are // significant outliers with the goal of removing edge cases where MEV skews // the gas prices -func removeOutliers(prices []*big.Int) []*big.Int { +func removeOutliers(gasPrices []*big.Int) []*big.Int { + prices := make([]*big.Int, len(gasPrices)) + copy(prices, gasPrices) + sort.Sort(bigIntArray(prices)) + var ( mean *big.Int variance *big.Int @@ -231,17 +235,17 @@ func removeOutliers(prices []*big.Int) []*big.Int { length = big.NewInt(int64(len(prices))) deviations = big.NewInt(3) // The max number of std from the mean we will accept ) - sort.Sort(bigIntArray(prices)) for _, price := range prices { sum.Add(sum, price) sumsq.Add(sum, price.Mul(price, price)) } - mean = sum.Div(sum, length) - variance = sumsq.Sub(sumsq.Div(sumsq, length), mean.Mul(mean, mean)) + mean = big.NewInt(0).Div(sum, length) + variance = big.NewInt(0).Abs(big.NewInt(0).Sub(big.NewInt(0).Div(sumsq, length), big.NewInt(0).Mul(mean, mean))) sd = variance.Sqrt(variance) var filtered = make([]*big.Int, 0) - for _, price := range prices { + for i, price := range prices { if price.Cmp(sd.Mul(sd, mean.Sub(mean, deviations))) == 1 && price.Cmp(sd.Mul(sd, mean.Add(mean, deviations))) == -1 { + println("heer", i) filtered = append(filtered, price) } } diff --git a/eth/gasprice/gasprice_test.go b/eth/gasprice/gasprice_test.go index cf53b7f6fa50..2b184ca83786 100644 --- a/eth/gasprice/gasprice_test.go +++ b/eth/gasprice/gasprice_test.go @@ -129,6 +129,7 @@ func TestRemoveOutliers(t *testing.T) { gasPrices = append(gasPrices, big.NewInt(int64(randGasPrice))) } cpy := make([]*big.Int, len(gasPrices)) + copy(cpy, gasPrices) // add low gas prices cpy = append(cpy, big.NewInt(5)) cpy = append(cpy, big.NewInt(10)) From 0d7e77b457434b61f96aef5f33880cacd5e5f018 Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 14:57:06 -0400 Subject: [PATCH 5/8] cleanup formula --- eth/gasprice/gasprice.go | 16 ++++++++++------ eth/gasprice/gasprice_test.go | 4 +++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index 5987d18c57fa..2b8cf9aafe0c 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -228,8 +228,8 @@ func removeOutliers(gasPrices []*big.Int) []*big.Int { var ( mean *big.Int - variance *big.Int sd *big.Int + variance = big.NewInt(0) sum = big.NewInt(0) sumsq = big.NewInt(0) length = big.NewInt(int64(len(prices))) @@ -237,18 +237,22 @@ func removeOutliers(gasPrices []*big.Int) []*big.Int { ) for _, price := range prices { sum.Add(sum, price) - sumsq.Add(sum, price.Mul(price, price)) + sumsq.Add(sum, big.NewInt(0).Mul(price, price)) } mean = big.NewInt(0).Div(sum, length) - variance = big.NewInt(0).Abs(big.NewInt(0).Sub(big.NewInt(0).Div(sumsq, length), big.NewInt(0).Mul(mean, mean))) - sd = variance.Sqrt(variance) + for _, price := range prices { + x := big.NewInt(0).Sub(price, mean) + sqr := big.NewInt(0).Mul(x, x) + variance.Add(variance, sqr) + } + sd = variance.Sqrt(big.NewInt(0).Div(variance, length)) var filtered = make([]*big.Int, 0) - for i, price := range prices { + for _, price := range prices { if price.Cmp(sd.Mul(sd, mean.Sub(mean, deviations))) == 1 && price.Cmp(sd.Mul(sd, mean.Add(mean, deviations))) == -1 { - println("heer", i) filtered = append(filtered, price) } } + println(len(filtered)) return filtered } diff --git a/eth/gasprice/gasprice_test.go b/eth/gasprice/gasprice_test.go index 2b184ca83786..b0b2fbf03693 100644 --- a/eth/gasprice/gasprice_test.go +++ b/eth/gasprice/gasprice_test.go @@ -136,5 +136,7 @@ func TestRemoveOutliers(t *testing.T) { cpy = append(cpy, big.NewInt(15)) res := removeOutliers(cpy) - t.Fatalf("Low gas prices not removed, want %d, got %d", gasPrices, res) + if len(gasPrices) < len(res) && len(res) == 0 { + t.Fatalf("Low gas prices not removed, want length less than %d, got %d", len(gasPrices), len(res)) + } } From dbe5de192adff06c766926f54106b58dfe954898 Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 17:12:22 -0400 Subject: [PATCH 6/8] cleanup comments and variables --- eth/gasprice/gasprice.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index 2b8cf9aafe0c..45ed3a801fa0 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -228,19 +228,21 @@ func removeOutliers(gasPrices []*big.Int) []*big.Int { var ( mean *big.Int - sd *big.Int + sd *big.Int // Standard deviation variance = big.NewInt(0) sum = big.NewInt(0) - sumsq = big.NewInt(0) + sumsq = big.NewInt(0) // The sums squared length = big.NewInt(int64(len(prices))) deviations = big.NewInt(3) // The max number of std from the mean we will accept ) for _, price := range prices { + // Calculate sum and sumsq sum.Add(sum, price) sumsq.Add(sum, big.NewInt(0).Mul(price, price)) } mean = big.NewInt(0).Div(sum, length) for _, price := range prices { + // Calculate the variance sum(x - mean)^2 x := big.NewInt(0).Sub(price, mean) sqr := big.NewInt(0).Mul(x, x) variance.Add(variance, sqr) @@ -248,11 +250,11 @@ func removeOutliers(gasPrices []*big.Int) []*big.Int { sd = variance.Sqrt(big.NewInt(0).Div(variance, length)) var filtered = make([]*big.Int, 0) for _, price := range prices { + // Remove items that are not within the upper and lower bounds of the deviation if price.Cmp(sd.Mul(sd, mean.Sub(mean, deviations))) == 1 && price.Cmp(sd.Mul(sd, mean.Add(mean, deviations))) == -1 { filtered = append(filtered, price) } } - println(len(filtered)) return filtered } From 014804be850adccc779133c54e26b56a429b358c Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 17:32:35 -0400 Subject: [PATCH 7/8] update per comments --- eth/gasprice/gasprice.go | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index 45ed3a801fa0..8054623efec1 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -221,24 +221,18 @@ func (gpo *Oracle) getBlockPrices(ctx context.Context, signer types.Signer, bloc // removeOutliers calculates the interquartile range of gas prices that are // significant outliers with the goal of removing edge cases where MEV skews // the gas prices -func removeOutliers(gasPrices []*big.Int) []*big.Int { - prices := make([]*big.Int, len(gasPrices)) - copy(prices, gasPrices) - sort.Sort(bigIntArray(prices)) - +func removeOutliers(prices []*big.Int) []*big.Int { var ( mean *big.Int sd *big.Int // Standard deviation variance = big.NewInt(0) sum = big.NewInt(0) - sumsq = big.NewInt(0) // The sums squared length = big.NewInt(int64(len(prices))) - deviations = big.NewInt(3) // The max number of std from the mean we will accept + deviations = big.NewInt(3) // The max acceptable std, anything outsid will be removed from the set ) for _, price := range prices { - // Calculate sum and sumsq + // Calculate sum sum.Add(sum, price) - sumsq.Add(sum, big.NewInt(0).Mul(price, price)) } mean = big.NewInt(0).Div(sum, length) for _, price := range prices { @@ -248,10 +242,12 @@ func removeOutliers(gasPrices []*big.Int) []*big.Int { variance.Add(variance, sqr) } sd = variance.Sqrt(big.NewInt(0).Div(variance, length)) - var filtered = make([]*big.Int, 0) + filtered := []*big.Int{} + upperBound := big.NewInt(0).Sub(mean, deviations) + lowerBound := big.NewInt(0).Add(mean, deviations) for _, price := range prices { // Remove items that are not within the upper and lower bounds of the deviation - if price.Cmp(sd.Mul(sd, mean.Sub(mean, deviations))) == 1 && price.Cmp(sd.Mul(sd, mean.Add(mean, deviations))) == -1 { + if price.Cmp(sd.Mul(sd, lowerBound)) == 1 && price.Cmp(sd.Mul(sd, upperBound)) == -1 { filtered = append(filtered, price) } } From 647b2030a0923a1bfb3402250f0cc264948515b7 Mon Sep 17 00:00:00 2001 From: Gregory Markou Date: Thu, 22 Apr 2021 18:04:31 -0400 Subject: [PATCH 8/8] fix formulas, add tests --- eth/gasprice/gasprice.go | 20 +++++++---- eth/gasprice/gasprice_test.go | 66 ++++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/eth/gasprice/gasprice.go b/eth/gasprice/gasprice.go index 8054623efec1..8785b59e5a60 100644 --- a/eth/gasprice/gasprice.go +++ b/eth/gasprice/gasprice.go @@ -235,19 +235,25 @@ func removeOutliers(prices []*big.Int) []*big.Int { sum.Add(sum, price) } mean = big.NewInt(0).Div(sum, length) + // Calculate variance (sum(x - mean)^2 )/ length for _, price := range prices { - // Calculate the variance sum(x - mean)^2 + // Calculate the summation x := big.NewInt(0).Sub(price, mean) - sqr := big.NewInt(0).Mul(x, x) - variance.Add(variance, sqr) + square := big.NewInt(0).Mul(x, x) + variance.Add(variance, square) } - sd = variance.Sqrt(big.NewInt(0).Div(variance, length)) + variance.Div(variance, length) + // Calculate standard deviation from the variance + sd = big.NewInt(0).Sqrt(variance) + filtered := []*big.Int{} - upperBound := big.NewInt(0).Sub(mean, deviations) - lowerBound := big.NewInt(0).Add(mean, deviations) + deviation := big.NewInt(0).Mul(deviations, sd) + lowerBound := big.NewInt(0).Sub(mean, deviation) + upperBound := big.NewInt(0).Add(mean, deviation) + for _, price := range prices { // Remove items that are not within the upper and lower bounds of the deviation - if price.Cmp(sd.Mul(sd, lowerBound)) == 1 && price.Cmp(sd.Mul(sd, upperBound)) == -1 { + if price.Cmp(lowerBound) == 1 && price.Cmp(upperBound) == -1 { filtered = append(filtered, price) } } diff --git a/eth/gasprice/gasprice_test.go b/eth/gasprice/gasprice_test.go index b0b2fbf03693..6df15ae0a269 100644 --- a/eth/gasprice/gasprice_test.go +++ b/eth/gasprice/gasprice_test.go @@ -119,24 +119,82 @@ func TestSuggestPrice(t *testing.T) { } } -func TestRemoveOutliers(t *testing.T) { +func generateFakeGasPrices(min, max int) []*big.Int { rand.Seed(time.Now().UnixNano()) - min := 180 - max := 220 gasPrices := make([]*big.Int, 0) for i := 0; i < 300; i++ { randGasPrice := rand.Intn(max-min+1) + min gasPrices = append(gasPrices, big.NewInt(int64(randGasPrice))) } + return gasPrices +} + +func TestRemoveLowOutliers(t *testing.T) { + gasPrices := generateFakeGasPrices(180, 220) cpy := make([]*big.Int, len(gasPrices)) copy(cpy, gasPrices) // add low gas prices cpy = append(cpy, big.NewInt(5)) cpy = append(cpy, big.NewInt(10)) + cpy = append(cpy, big.NewInt(10)) + cpy = append(cpy, big.NewInt(10)) + cpy = append(cpy, big.NewInt(15)) + cpy = append(cpy, big.NewInt(15)) + cpy = append(cpy, big.NewInt(15)) cpy = append(cpy, big.NewInt(15)) res := removeOutliers(cpy) - if len(gasPrices) < len(res) && len(res) == 0 { + // It should remove all the lower gasPrices in the extreme case + if len(gasPrices) != len(res) { + t.Fatalf("Low gas prices not removed, want length less than %d, got %d", len(gasPrices), len(res)) + } +} + +func TestRemoveHighOutliars(t *testing.T) { + gasPrices := generateFakeGasPrices(180, 220) + cpy := make([]*big.Int, len(gasPrices)) + copy(cpy, gasPrices) + // add low gas prices + cpy = append(cpy, big.NewInt(300)) + cpy = append(cpy, big.NewInt(310)) + cpy = append(cpy, big.NewInt(350)) + cpy = append(cpy, big.NewInt(250)) + cpy = append(cpy, big.NewInt(251)) + cpy = append(cpy, big.NewInt(245)) + cpy = append(cpy, big.NewInt(255)) + cpy = append(cpy, big.NewInt(256)) + + res := removeOutliers(cpy) + // It should remove most of the higher values + if len(cpy) < len(res) { + t.Fatalf("Low gas prices not removed, want length less than %d, got %d", len(gasPrices), len(res)) + } +} + +func TestRemoveHighAndLowOutliars(t *testing.T) { + gasPrices := generateFakeGasPrices(180, 220) + cpy := make([]*big.Int, len(gasPrices)) + copy(cpy, gasPrices) + // add low gas prices + cpy = append(cpy, big.NewInt(300)) + cpy = append(cpy, big.NewInt(310)) + cpy = append(cpy, big.NewInt(350)) + cpy = append(cpy, big.NewInt(250)) + cpy = append(cpy, big.NewInt(251)) + cpy = append(cpy, big.NewInt(245)) + cpy = append(cpy, big.NewInt(255)) + cpy = append(cpy, big.NewInt(256)) + cpy = append(cpy, big.NewInt(50)) + cpy = append(cpy, big.NewInt(100)) + cpy = append(cpy, big.NewInt(70)) + cpy = append(cpy, big.NewInt(75)) + cpy = append(cpy, big.NewInt(5)) + cpy = append(cpy, big.NewInt(0)) + cpy = append(cpy, big.NewInt(4)) + cpy = append(cpy, big.NewInt(2)) + res := removeOutliers(cpy) + // It should remove most of the higher values + if len(cpy) < len(res) { t.Fatalf("Low gas prices not removed, want length less than %d, got %d", len(gasPrices), len(res)) } }