From 43e27148334344b652ee6bdf520776be5c692dc0 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 14:11:26 -0500 Subject: [PATCH 01/41] op-service: Add ActiveL2EndpointProvider. --- op-service/dial/active_l2_provider.go | 161 ++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 op-service/dial/active_l2_provider.go diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go new file mode 100644 index 000000000000..1a6d957d0d8b --- /dev/null +++ b/op-service/dial/active_l2_provider.go @@ -0,0 +1,161 @@ +package dial + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/ethereum-optimism/optimism/op-service/sources" + "github.com/ethereum/go-ethereum/ethclient" + "github.com/ethereum/go-ethereum/log" +) + +type ActiveL2EndpointProvider struct { + endpoints []endpointUrls + checkDuration time.Duration + networkTimeout time.Duration + log log.Logger + + idx int + activeTimeout time.Time + + currentEthClient *ethclient.Client + currentRollupClient *sources.RollupClient + clientLock *sync.Mutex +} + +type endpointUrls struct { + ethUrl string + rollupUrl string +} + +func NewActiveL2EndpointProvider( + ethUrls, rollupUrls []string, + checkDuration time.Duration, + networkTimeout time.Duration, + logger log.Logger, +) (*ActiveL2EndpointProvider, error) { + if len(ethUrls) == 0 || len(rollupUrls) == 0 { + return nil, errors.New("empty urls list") + } + if len(ethUrls) != len(rollupUrls) { + return nil, errors.New("number of eth and rollup urls mismatch") + } + + n := len(ethUrls) + eps := make([]endpointUrls, 0, n) + for i := 0; i < n; i++ { + eps = append(eps, endpointUrls{ + ethUrl: ethUrls[0], + rollupUrl: rollupUrls[0], + }) + } + + return &ActiveL2EndpointProvider{ + endpoints: eps, + checkDuration: checkDuration, + networkTimeout: networkTimeout, + log: logger, + }, nil +} + +func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (*ethclient.Client, error) { + err := p.ensureActiveEndpoint(ctx) + if err != nil { + return nil, err + } + p.clientLock.Lock() + defer p.clientLock.Unlock() + return p.currentEthClient, nil +} + +func (p *ActiveL2EndpointProvider) RollupClient(ctx context.Context) (*sources.RollupClient, error) { + err := p.ensureActiveEndpoint(ctx) + if err != nil { + return nil, err + } + p.clientLock.Lock() + defer p.clientLock.Unlock() + return p.currentRollupClient, nil +} + +func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { + if !p.shouldCheck() { + return nil + } + + if err := p.findActiveEndpoints(ctx); err != nil { + return err + } + p.activeTimeout = time.Now().Add(p.checkDuration) + return nil +} + +func (p *ActiveL2EndpointProvider) shouldCheck() bool { + return time.Now().After(p.activeTimeout) +} + +func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { + // If current is not active, dial new sequencers until finding an active one. + ts := time.Now() + for i := 0; ; i++ { + active, err := p.checkCurrentSequencer(ctx) + if err != nil { + if ctx.Err() != nil { + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) + return fmt.Errorf("querying active sequencer: %w", err) + } + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) + } else if active { + p.log.Debug("Current sequencer active.", "try", i) + return nil + } else { + p.log.Info("Current sequencer inactive, trying next.", "try", i) + } + + // After iterating over all endpoints, sleep if all were just inactive, + // to avoid spamming the sequencers in a loop. + if (i+1)%p.NumEndpoints() == 0 { + d := ts.Add(p.checkDuration).Sub(time.Now()) + time.Sleep(d) // accepts negative + ts = time.Now() + } + + if err := p.dialNextSequencer(ctx, i); err != nil { + return fmt.Errorf("dialing next sequencer: %w", err) + } + } +} + +func (p *ActiveL2EndpointProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + p.clientLock.Lock() + defer p.clientLock.Unlock() + return p.currentRollupClient.SequencerActive(cctx) +} + +func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx int) error { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + ep := p.endpoints[idx] + + ethClient, err := DialEthClientWithTimeout(cctx, p.networkTimeout, p.log, ep.ethUrl) + if err != nil { + return fmt.Errorf("dialing eth client: %w", err) + } + rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, ep.rollupUrl) + if err != nil { + return fmt.Errorf("dialing rollup client: %w", err) + } + p.clientLock.Lock() + defer p.clientLock.Unlock() + p.currentEthClient, p.currentRollupClient = ethClient, rollupClient + return nil +} + +func (p *ActiveL2EndpointProvider) NumEndpoints() int { + return len(p.endpoints) +} From 7ac1fda99bdef68995face3dcbc201cafd68c2e1 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 14:26:19 -0500 Subject: [PATCH 02/41] Fix bug in initialization, and handle case where no ethUrls are provided. --- op-service/dial/active_l2_provider.go | 39 ++++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 1a6d957d0d8b..aefa0517b911 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -37,19 +37,24 @@ func NewActiveL2EndpointProvider( networkTimeout time.Duration, logger log.Logger, ) (*ActiveL2EndpointProvider, error) { - if len(ethUrls) == 0 || len(rollupUrls) == 0 { - return nil, errors.New("empty urls list") + if len(rollupUrls) == 0 { + return nil, errors.New("empty rollup urls list") } - if len(ethUrls) != len(rollupUrls) { - return nil, errors.New("number of eth and rollup urls mismatch") + useEthClients := len(ethUrls) > 0 + if useEthClients && len(ethUrls) != len(rollupUrls) { + return nil, errors.New("eth urls provided, but number of eth urls and rollup urls mismatch") + } + + n := len(rollupUrls) + if !useEthClients { + ethUrls = make([]string, n) } - n := len(ethUrls) eps := make([]endpointUrls, 0, n) for i := 0; i < n; i++ { eps = append(eps, endpointUrls{ - ethUrl: ethUrls[0], - rollupUrl: rollupUrls[0], + ethUrl: ethUrls[i], + rollupUrl: rollupUrls[i], }) } @@ -142,10 +147,15 @@ func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx in defer cancel() ep := p.endpoints[idx] - ethClient, err := DialEthClientWithTimeout(cctx, p.networkTimeout, p.log, ep.ethUrl) - if err != nil { - return fmt.Errorf("dialing eth client: %w", err) + var ethClient *ethclient.Client + var err error + if ep.ethUrl != "" { + ethClient, err = DialEthClientWithTimeout(cctx, p.networkTimeout, p.log, ep.ethUrl) + if err != nil { + return fmt.Errorf("dialing eth client: %w", err) + } } + rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, ep.rollupUrl) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) @@ -159,3 +169,12 @@ func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx in func (p *ActiveL2EndpointProvider) NumEndpoints() int { return len(p.endpoints) } + +func (p *ActiveL2EndpointProvider) Close() { + if p.currentEthClient != nil { + p.currentEthClient.Close() + } + if p.currentRollupClient != nil { + p.currentRollupClient.Close() + } +} From a233a87bdc3c3018d39a3868b8781dfc5dfd7ceb Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 14:53:42 -0500 Subject: [PATCH 03/41] Split active L2 provider into active rollup and active L2 provider. --- op-service/dial/active_l2_provider.go | 122 ++++---------------- op-service/dial/active_rollup_provider.go | 133 ++++++++++++++++++++++ 2 files changed, 155 insertions(+), 100 deletions(-) create mode 100644 op-service/dial/active_rollup_provider.go diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index aefa0517b911..e202a259db2f 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "sync" "time" "github.com/ethereum-optimism/optimism/op-service/sources" @@ -13,22 +12,9 @@ import ( ) type ActiveL2EndpointProvider struct { - endpoints []endpointUrls - checkDuration time.Duration - networkTimeout time.Duration - log log.Logger - - idx int - activeTimeout time.Time - - currentEthClient *ethclient.Client - currentRollupClient *sources.RollupClient - clientLock *sync.Mutex -} - -type endpointUrls struct { - ethUrl string - rollupUrl string + ActiveL2RollupProvider + ethEndpoints []string + currentEthClient *ethclient.Client } func NewActiveL2EndpointProvider( @@ -40,29 +26,18 @@ func NewActiveL2EndpointProvider( if len(rollupUrls) == 0 { return nil, errors.New("empty rollup urls list") } - useEthClients := len(ethUrls) > 0 - if useEthClients && len(ethUrls) != len(rollupUrls) { - return nil, errors.New("eth urls provided, but number of eth urls and rollup urls mismatch") + if len(ethUrls) != len(rollupUrls) { + return nil, errors.New("number of eth urls and rollup urls mismatch") } - n := len(rollupUrls) - if !useEthClients { - ethUrls = make([]string, n) - } - - eps := make([]endpointUrls, 0, n) - for i := 0; i < n; i++ { - eps = append(eps, endpointUrls{ - ethUrl: ethUrls[i], - rollupUrl: rollupUrls[i], - }) + rollupProvider, err := NewActiveL2RollupProvider(rollupUrls, checkDuration, networkTimeout, logger) + if err != nil { + return nil, err } return &ActiveL2EndpointProvider{ - endpoints: eps, - checkDuration: checkDuration, - networkTimeout: networkTimeout, - log: logger, + ActiveL2RollupProvider: *rollupProvider, + ethEndpoints: ethUrls, }, nil } @@ -77,86 +52,35 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (*ethclient.Cl } func (p *ActiveL2EndpointProvider) RollupClient(ctx context.Context) (*sources.RollupClient, error) { - err := p.ensureActiveEndpoint(ctx) - if err != nil { - return nil, err - } - p.clientLock.Lock() - defer p.clientLock.Unlock() - return p.currentRollupClient, nil + return p.ActiveL2RollupProvider.RollupClient(ctx) } func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { - if !p.shouldCheck() { - return nil - } - - if err := p.findActiveEndpoints(ctx); err != nil { - return err - } - p.activeTimeout = time.Now().Add(p.checkDuration) - return nil + return p.ActiveL2RollupProvider.ensureActiveEndpoint(ctx) } func (p *ActiveL2EndpointProvider) shouldCheck() bool { - return time.Now().After(p.activeTimeout) + return p.ActiveL2RollupProvider.shouldCheck() } func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { - // If current is not active, dial new sequencers until finding an active one. - ts := time.Now() - for i := 0; ; i++ { - active, err := p.checkCurrentSequencer(ctx) - if err != nil { - if ctx.Err() != nil { - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) - return fmt.Errorf("querying active sequencer: %w", err) - } - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) - } else if active { - p.log.Debug("Current sequencer active.", "try", i) - return nil - } else { - p.log.Info("Current sequencer inactive, trying next.", "try", i) - } - - // After iterating over all endpoints, sleep if all were just inactive, - // to avoid spamming the sequencers in a loop. - if (i+1)%p.NumEndpoints() == 0 { - d := ts.Add(p.checkDuration).Sub(time.Now()) - time.Sleep(d) // accepts negative - ts = time.Now() - } - - if err := p.dialNextSequencer(ctx, i); err != nil { - return fmt.Errorf("dialing next sequencer: %w", err) - } - } + return p.ActiveL2RollupProvider.findActiveEndpoints(ctx) } func (p *ActiveL2EndpointProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { - cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) - defer cancel() - p.clientLock.Lock() - defer p.clientLock.Unlock() - return p.currentRollupClient.SequencerActive(cctx) + return p.ActiveL2RollupProvider.checkCurrentSequencer(ctx) } func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx int) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - ep := p.endpoints[idx] - - var ethClient *ethclient.Client - var err error - if ep.ethUrl != "" { - ethClient, err = DialEthClientWithTimeout(cctx, p.networkTimeout, p.log, ep.ethUrl) - if err != nil { - return fmt.Errorf("dialing eth client: %w", err) - } + + ethClient, err := DialEthClientWithTimeout(cctx, p.networkTimeout, p.log, p.ethEndpoints[idx]) + if err != nil { + return fmt.Errorf("dialing eth client: %w", err) } - rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, ep.rollupUrl) + rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, p.rollupEndpoints[idx]) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) } @@ -167,14 +91,12 @@ func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx in } func (p *ActiveL2EndpointProvider) NumEndpoints() int { - return len(p.endpoints) + return len(p.ethEndpoints) } func (p *ActiveL2EndpointProvider) Close() { if p.currentEthClient != nil { p.currentEthClient.Close() } - if p.currentRollupClient != nil { - p.currentRollupClient.Close() - } + p.ActiveL2RollupProvider.Close() } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go new file mode 100644 index 000000000000..358aa7267f66 --- /dev/null +++ b/op-service/dial/active_rollup_provider.go @@ -0,0 +1,133 @@ +package dial + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/ethereum-optimism/optimism/op-service/sources" + "github.com/ethereum/go-ethereum/log" +) + +type ActiveL2RollupProvider struct { + rollupEndpoints []string + checkDuration time.Duration + networkTimeout time.Duration + log log.Logger + + activeTimeout time.Time + + currentRollupClient *sources.RollupClient + clientLock *sync.Mutex +} + +func NewActiveL2RollupProvider( + rollupUrls []string, + checkDuration time.Duration, + networkTimeout time.Duration, + logger log.Logger, +) (*ActiveL2RollupProvider, error) { + if len(rollupUrls) == 0 { + return nil, errors.New("empty rollup urls list") + } + + return &ActiveL2RollupProvider{ + rollupEndpoints: rollupUrls, + checkDuration: checkDuration, + networkTimeout: networkTimeout, + log: logger, + }, nil +} + +func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (*sources.RollupClient, error) { + err := p.ensureActiveEndpoint(ctx) + if err != nil { + return nil, err + } + p.clientLock.Lock() + defer p.clientLock.Unlock() + return p.currentRollupClient, nil +} + +func (p *ActiveL2RollupProvider) ensureActiveEndpoint(ctx context.Context) error { + if !p.shouldCheck() { + return nil + } + + if err := p.findActiveEndpoints(ctx); err != nil { + return err + } + p.activeTimeout = time.Now().Add(p.checkDuration) + return nil +} + +func (p *ActiveL2RollupProvider) shouldCheck() bool { + return time.Now().After(p.activeTimeout) +} + +func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { + // If current is not active, dial new sequencers until finding an active one. + ts := time.Now() + for i := 0; ; i++ { + active, err := p.checkCurrentSequencer(ctx) + if err != nil { + if ctx.Err() != nil { + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) + return fmt.Errorf("querying active sequencer: %w", err) + } + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) + } else if active { + p.log.Debug("Current sequencer active.", "try", i) + return nil + } else { + p.log.Info("Current sequencer inactive, trying next.", "try", i) + } + + // After iterating over all endpoints, sleep if all were just inactive, + // to avoid spamming the sequencers in a loop. + if (i+1)%p.NumEndpoints() == 0 { + d := ts.Add(p.checkDuration).Sub(time.Now()) + time.Sleep(d) // accepts negative + ts = time.Now() + } + + if err := p.dialNextSequencer(ctx, i); err != nil { + return fmt.Errorf("dialing next sequencer: %w", err) + } + } +} + +func (p *ActiveL2RollupProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + p.clientLock.Lock() + defer p.clientLock.Unlock() + return p.currentRollupClient.SequencerActive(cctx) +} + +func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context, idx int) error { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + ep := p.rollupEndpoints[idx] + + rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, ep) + if err != nil { + return fmt.Errorf("dialing rollup client: %w", err) + } + p.clientLock.Lock() + defer p.clientLock.Unlock() + p.currentRollupClient = rollupClient + return nil +} + +func (p *ActiveL2RollupProvider) NumEndpoints() int { + return len(p.rollupEndpoints) +} + +func (p *ActiveL2RollupProvider) Close() { + if p.currentRollupClient != nil { + p.currentRollupClient.Close() + } +} From 997aae6717484fe2d2491c2ee8817f0acc218361 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 15:05:14 -0500 Subject: [PATCH 04/41] Re-duplicate some code until tests are passing. --- op-service/dial/active_l2_provider.go | 48 +++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index e202a259db2f..7592e4a76381 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -52,11 +52,25 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (*ethclient.Cl } func (p *ActiveL2EndpointProvider) RollupClient(ctx context.Context) (*sources.RollupClient, error) { - return p.ActiveL2RollupProvider.RollupClient(ctx) + err := p.ensureActiveEndpoint(ctx) + if err != nil { + return nil, err + } + p.clientLock.Lock() + defer p.clientLock.Unlock() + return p.currentRollupClient, nil } func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { - return p.ActiveL2RollupProvider.ensureActiveEndpoint(ctx) + if !p.shouldCheck() { + return nil + } + + if err := p.findActiveEndpoints(ctx); err != nil { + return err + } + p.activeTimeout = time.Now().Add(p.checkDuration) + return nil } func (p *ActiveL2EndpointProvider) shouldCheck() bool { @@ -64,7 +78,35 @@ func (p *ActiveL2EndpointProvider) shouldCheck() bool { } func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { - return p.ActiveL2RollupProvider.findActiveEndpoints(ctx) + // If current is not active, dial new sequencers until finding an active one. + ts := time.Now() + for i := 0; ; i++ { + active, err := p.checkCurrentSequencer(ctx) + if err != nil { + if ctx.Err() != nil { + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) + return fmt.Errorf("querying active sequencer: %w", err) + } + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) + } else if active { + p.log.Debug("Current sequencer active.", "try", i) + return nil + } else { + p.log.Info("Current sequencer inactive, trying next.", "try", i) + } + + // After iterating over all endpoints, sleep if all were just inactive, + // to avoid spamming the sequencers in a loop. + if (i+1)%p.NumEndpoints() == 0 { + d := ts.Add(p.checkDuration).Sub(time.Now()) + time.Sleep(d) // accepts negative + ts = time.Now() + } + + if err := p.dialNextSequencer(ctx, i); err != nil { + return fmt.Errorf("dialing next sequencer: %w", err) + } + } } func (p *ActiveL2EndpointProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { From 6f411335306e8ce3149c259be07815a5c4310be2 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 15:20:59 -0500 Subject: [PATCH 05/41] op-proposer: Add ability to enable active provider. --- op-proposer/flags/flags.go | 2 +- op-proposer/proposer/config.go | 2 +- op-proposer/proposer/service.go | 9 ++++++++- op-service/dial/active_l2_provider.go | 4 +++- op-service/dial/active_rollup_provider.go | 2 +- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/op-proposer/flags/flags.go b/op-proposer/flags/flags.go index d9acde95dcd4..2ac346d708ba 100644 --- a/op-proposer/flags/flags.go +++ b/op-proposer/flags/flags.go @@ -29,7 +29,7 @@ var ( } RollupRpcFlag = &cli.StringFlag{ Name: "rollup-rpc", - Usage: "HTTP provider URL for the rollup node", + Usage: "HTTP provider URL for the rollup node. A comma-separated list enables the active rollup provider.", EnvVars: prefixEnvVars("ROLLUP_RPC"), } L2OOAddressFlag = &cli.StringFlag{ diff --git a/op-proposer/proposer/config.go b/op-proposer/proposer/config.go index c4a53a8aab76..dcc493f4e7f5 100644 --- a/op-proposer/proposer/config.go +++ b/op-proposer/proposer/config.go @@ -22,7 +22,7 @@ type CLIConfig struct { // L1EthRpc is the HTTP provider URL for L1. L1EthRpc string - // RollupRpc is the HTTP provider URL for the rollup node. + // RollupRpc is the HTTP provider URL for the rollup node. A comma-separated list enables the active rollup provider. RollupRpc string // L2OOAddress is the L2OutputOracle contract address. diff --git a/op-proposer/proposer/service.go b/op-proposer/proposer/service.go index c60f0bb07d54..77d38849d599 100644 --- a/op-proposer/proposer/service.go +++ b/op-proposer/proposer/service.go @@ -7,6 +7,7 @@ import ( "io" "net" "strconv" + "strings" "sync/atomic" "time" @@ -121,7 +122,13 @@ func (ps *ProposerService) initRPCClients(ctx context.Context, cfg *CLIConfig) e } ps.L1Client = l1Client - rollupProvider, err := dial.NewStaticL2RollupProvider(ctx, ps.Log, cfg.RollupRpc) + var rollupProvider dial.RollupProvider + if strings.Contains(cfg.RollupRpc, ",") { + rollupUrls := strings.Split(cfg.RollupRpc, ",") + rollupProvider, err = dial.NewActiveL2RollupProvider(rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log) + } else { + rollupProvider, err = dial.NewStaticL2RollupProvider(ctx, ps.Log, cfg.RollupRpc) + } if err != nil { return fmt.Errorf("failed to build L2 endpoint provider: %w", err) } diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 7592e4a76381..1fad33ffd08a 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -11,6 +11,8 @@ import ( "github.com/ethereum/go-ethereum/log" ) +const DefaultActiveSequencerFollowerCheckDuration = 2 * DefaultDialTimeout + type ActiveL2EndpointProvider struct { ActiveL2RollupProvider ethEndpoints []string @@ -98,7 +100,7 @@ func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) erro // After iterating over all endpoints, sleep if all were just inactive, // to avoid spamming the sequencers in a loop. if (i+1)%p.NumEndpoints() == 0 { - d := ts.Add(p.checkDuration).Sub(time.Now()) + d := time.Until(ts.Add(p.checkDuration)) time.Sleep(d) // accepts negative ts = time.Now() } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 358aa7267f66..dc859a98aa61 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -88,7 +88,7 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error // After iterating over all endpoints, sleep if all were just inactive, // to avoid spamming the sequencers in a loop. if (i+1)%p.NumEndpoints() == 0 { - d := ts.Add(p.checkDuration).Sub(time.Now()) + d := time.Until(ts.Add(p.checkDuration)) time.Sleep(d) // accepts negative ts = time.Now() } From 3f12c5717cbc531a587cd4a3bed2709b3266d6db Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 15:26:17 -0500 Subject: [PATCH 06/41] op-batcher: Add ability to enable active provider. --- op-batcher/batcher/config.go | 4 ++-- op-batcher/batcher/service.go | 12 ++++++++++-- op-batcher/flags/flags.go | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/op-batcher/batcher/config.go b/op-batcher/batcher/config.go index 927446f4137b..036d164758fc 100644 --- a/op-batcher/batcher/config.go +++ b/op-batcher/batcher/config.go @@ -20,10 +20,10 @@ type CLIConfig struct { // L1EthRpc is the HTTP provider URL for L1. L1EthRpc string - // L2EthRpc is the HTTP provider URL for the L2 execution engine. + // L2EthRpc is the HTTP provider URL for the L2 execution engine. A comma-separated list enables the active L2 provider. Such a list needs to match the number of RollupRpcs provided. L2EthRpc string - // RollupRpc is the HTTP provider URL for the L2 rollup node. + // RollupRpc is the HTTP provider URL for the L2 rollup node. A comma-separated list enables the active L2 provider. Such a list needs to match the number of L2EthRpcs provided. RollupRpc string // MaxChannelDuration is the maximum duration (in #L1-blocks) to keep a diff --git a/op-batcher/batcher/service.go b/op-batcher/batcher/service.go index 9a55464e621d..c8c41935a695 100644 --- a/op-batcher/batcher/service.go +++ b/op-batcher/batcher/service.go @@ -8,6 +8,7 @@ import ( "net" _ "net/http/pprof" "strconv" + "strings" "sync/atomic" "time" @@ -125,9 +126,16 @@ func (bs *BatcherService) initRPCClients(ctx context.Context, cfg *CLIConfig) er } bs.L1Client = l1Client - endpointProvider, err := dial.NewStaticL2EndpointProvider(ctx, bs.Log, cfg.L2EthRpc, cfg.RollupRpc) + var endpointProvider dial.L2EndpointProvider + if strings.Contains(cfg.RollupRpc, ",") { + rollupUrls := strings.Split(cfg.RollupRpc, ",") + ethUrls := strings.Split(cfg.L2EthRpc, ",") + endpointProvider, err = dial.NewActiveL2EndpointProvider(ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) + } else { + endpointProvider, err = dial.NewStaticL2EndpointProvider(ctx, bs.Log, cfg.L2EthRpc, cfg.RollupRpc) + } if err != nil { - return fmt.Errorf("failed to create L2 endpoint provider: %w", err) + return fmt.Errorf("failed to build L2 endpoint provider: %w", err) } bs.EndpointProvider = endpointProvider diff --git a/op-batcher/flags/flags.go b/op-batcher/flags/flags.go index 5e752d9d2748..72a1bb1567f0 100644 --- a/op-batcher/flags/flags.go +++ b/op-batcher/flags/flags.go @@ -30,12 +30,12 @@ var ( } L2EthRpcFlag = &cli.StringFlag{ Name: "l2-eth-rpc", - Usage: "HTTP provider URL for L2 execution engine", + Usage: "HTTP provider URL for L2 execution engine. A comma-separated list enables the active L2 endpoint provider. Such a list needs to match the number of rollup-rpcs provided.", EnvVars: prefixEnvVars("L2_ETH_RPC"), } RollupRpcFlag = &cli.StringFlag{ Name: "rollup-rpc", - Usage: "HTTP provider URL for Rollup node", + Usage: "HTTP provider URL for Rollup node. A comma-separated list enables the active L2 endpoint provider. Such a list needs to match the number of l2-eth-rpcs provided.", EnvVars: prefixEnvVars("ROLLUP_RPC"), } // Optional flags From 05b06f23f9b219463cedb070af136e1982c88d6f Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 16:14:31 -0500 Subject: [PATCH 07/41] Add an empty test skeleton. --- op-service/dial/active_l2_provider_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 op-service/dial/active_l2_provider_test.go diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go new file mode 100644 index 000000000000..95eeefa4db07 --- /dev/null +++ b/op-service/dial/active_l2_provider_test.go @@ -0,0 +1,16 @@ +package dial + +import ( + "testing" +) + +// TestActiveSequencerFailoverBehavior tests the behavior of the ActiveSequencerProvider when the active sequencer fails +func TestActiveSequencerFailoverBehavior(t *testing.T) { + // set up a few mock ethclients + // set up a few mock rollup clients + // make the first mock rollup client return Active, then Inactive + // make the second mock rollup client return Inactive, then Active + // make ActiveL2EndpointProvider, probably manually + // ask it for a rollup client, should get the first one + // ask it for a rollup client, should get the second one +} From 113a688738ed3b21ebef05bad78edbbb0e4a5f23 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 12 Dec 2023 16:49:22 -0500 Subject: [PATCH 08/41] Add an empty test skeleton. --- op-batcher/batcher/service.go | 2 +- op-service/dial/active_l2_provider.go | 12 ------------ op-service/dial/active_l2_provider_test.go | 16 ++++++++++++++++ 3 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 op-service/dial/active_l2_provider_test.go diff --git a/op-batcher/batcher/service.go b/op-batcher/batcher/service.go index c8c41935a695..ff49e3b0061a 100644 --- a/op-batcher/batcher/service.go +++ b/op-batcher/batcher/service.go @@ -127,7 +127,7 @@ func (bs *BatcherService) initRPCClients(ctx context.Context, cfg *CLIConfig) er bs.L1Client = l1Client var endpointProvider dial.L2EndpointProvider - if strings.Contains(cfg.RollupRpc, ",") { + if strings.Contains(cfg.RollupRpc, ",") || strings.Contains(cfg.L2EthRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") ethUrls := strings.Split(cfg.L2EthRpc, ",") endpointProvider, err = dial.NewActiveL2EndpointProvider(ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 1fad33ffd08a..30adabd03c04 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -75,10 +75,6 @@ func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) err return nil } -func (p *ActiveL2EndpointProvider) shouldCheck() bool { - return p.ActiveL2RollupProvider.shouldCheck() -} - func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { // If current is not active, dial new sequencers until finding an active one. ts := time.Now() @@ -111,10 +107,6 @@ func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) erro } } -func (p *ActiveL2EndpointProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { - return p.ActiveL2RollupProvider.checkCurrentSequencer(ctx) -} - func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx int) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() @@ -134,10 +126,6 @@ func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx in return nil } -func (p *ActiveL2EndpointProvider) NumEndpoints() int { - return len(p.ethEndpoints) -} - func (p *ActiveL2EndpointProvider) Close() { if p.currentEthClient != nil { p.currentEthClient.Close() diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go new file mode 100644 index 000000000000..95eeefa4db07 --- /dev/null +++ b/op-service/dial/active_l2_provider_test.go @@ -0,0 +1,16 @@ +package dial + +import ( + "testing" +) + +// TestActiveSequencerFailoverBehavior tests the behavior of the ActiveSequencerProvider when the active sequencer fails +func TestActiveSequencerFailoverBehavior(t *testing.T) { + // set up a few mock ethclients + // set up a few mock rollup clients + // make the first mock rollup client return Active, then Inactive + // make the second mock rollup client return Inactive, then Active + // make ActiveL2EndpointProvider, probably manually + // ask it for a rollup client, should get the first one + // ask it for a rollup client, should get the second one +} From 87a359db9ab315a5ad220e3fbe0f4b2a35b0fdaa Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Wed, 13 Dec 2023 11:54:17 -0500 Subject: [PATCH 09/41] op-service: add, but do not yet use, RollupClientInterface and EthClientInterface. --- op-batcher/batcher/service.go | 2 +- op-proposer/proposer/driver.go | 12 +++-- op-service/dial/active_l2_provider.go | 13 +---- op-service/dial/active_rollup_provider.go | 2 +- op-service/dial/client_interface.go | 57 ++++++++++++++++++++ op-service/dial/static_l2_provider.go | 4 +- op-service/dial/static_rollup_provider.go | 4 +- op-service/sources/rollupclient_interface.go | 20 +++++++ 8 files changed, 93 insertions(+), 21 deletions(-) create mode 100644 op-service/dial/client_interface.go create mode 100644 op-service/sources/rollupclient_interface.go diff --git a/op-batcher/batcher/service.go b/op-batcher/batcher/service.go index ff49e3b0061a..13647ca7adf9 100644 --- a/op-batcher/batcher/service.go +++ b/op-batcher/batcher/service.go @@ -127,7 +127,7 @@ func (bs *BatcherService) initRPCClients(ctx context.Context, cfg *CLIConfig) er bs.L1Client = l1Client var endpointProvider dial.L2EndpointProvider - if strings.Contains(cfg.RollupRpc, ",") || strings.Contains(cfg.L2EthRpc, ",") { + if strings.Contains(cfg.RollupRpc, ",") || strings.Contains(cfg.L2EthRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") ethUrls := strings.Split(cfg.L2EthRpc, ",") endpointProvider, err = dial.NewActiveL2EndpointProvider(ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) diff --git a/op-proposer/proposer/driver.go b/op-proposer/proposer/driver.go index ac8eb2aba167..b15dff8096ee 100644 --- a/op-proposer/proposer/driver.go +++ b/op-proposer/proposer/driver.go @@ -20,6 +20,7 @@ import ( "github.com/ethereum-optimism/optimism/op-proposer/metrics" "github.com/ethereum-optimism/optimism/op-service/dial" "github.com/ethereum-optimism/optimism/op-service/eth" + "github.com/ethereum-optimism/optimism/op-service/sources" "github.com/ethereum-optimism/optimism/op-service/txmgr" ) @@ -199,14 +200,19 @@ func (l *L2OutputSubmitter) fetchOutput(ctx context.Context, block *big.Int) (*e ctx, cancel := context.WithTimeout(ctx, l.Cfg.NetworkTimeout) defer cancel() - rollupClient, err := l.RollupProvider.RollupClient(ctx) + rollupInterface, err := l.RollupProvider.RollupClient(ctx) if err != nil { - l.Log.Error("proposer unable to get rollup client", "err", err) + l.Log.Error("proposer unable to get rollup interface", "err", err) + return nil, false, err + } + rollupClient, ok := rollupInterface.(*sources.RollupClient) + if !ok { + l.Log.Error("proposer unable to get rollup client", "block", block, "err", err) return nil, false, err } output, err := rollupClient.OutputAtBlock(ctx, block.Uint64()) if err != nil { - l.Log.Error("failed to fetch output at block %d: %w", block, err) + l.Log.Error("failed to fetch output at block", "block", block, "err", err) return nil, false, err } if output.Version != supportedL2OutputVersion { diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 30adabd03c04..21f0e861490f 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -6,7 +6,6 @@ import ( "fmt" "time" - "github.com/ethereum-optimism/optimism/op-service/sources" "github.com/ethereum/go-ethereum/ethclient" "github.com/ethereum/go-ethereum/log" ) @@ -43,7 +42,7 @@ func NewActiveL2EndpointProvider( }, nil } -func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (*ethclient.Client, error) { +func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (ClientInterface, error) { err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err @@ -53,16 +52,6 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (*ethclient.Cl return p.currentEthClient, nil } -func (p *ActiveL2EndpointProvider) RollupClient(ctx context.Context) (*sources.RollupClient, error) { - err := p.ensureActiveEndpoint(ctx) - if err != nil { - return nil, err - } - p.clientLock.Lock() - defer p.clientLock.Unlock() - return p.currentRollupClient, nil -} - func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { if !p.shouldCheck() { return nil diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index dc859a98aa61..a956da850b54 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -41,7 +41,7 @@ func NewActiveL2RollupProvider( }, nil } -func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (*sources.RollupClient, error) { +func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (sources.RollupClientInterface, error) { err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err diff --git a/op-service/dial/client_interface.go b/op-service/dial/client_interface.go new file mode 100644 index 000000000000..2ab9287be497 --- /dev/null +++ b/op-service/dial/client_interface.go @@ -0,0 +1,57 @@ +package dial + +import ( + "context" + "math/big" + + "github.com/ethereum/go-ethereum" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rpc" +) + +type ClientInterface interface { + // Blockchain Access + ChainID(ctx context.Context) (*big.Int, error) + BlockByHash(ctx context.Context, hash common.Hash) (*types.Block, error) + BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error) + BlockNumber(ctx context.Context) (uint64, error) + BlockReceipts(ctx context.Context, blockNrOrHash rpc.BlockNumberOrHash) ([]*types.Receipt, error) + HeaderByHash(ctx context.Context, hash common.Hash) (*types.Header, error) + HeaderByNumber(ctx context.Context, number *big.Int) (*types.Header, error) + TransactionByHash(ctx context.Context, hash common.Hash) (*types.Transaction, bool, error) + TransactionSender(ctx context.Context, tx *types.Transaction, block common.Hash, index uint) (common.Address, error) + TransactionCount(ctx context.Context, blockHash common.Hash) (uint, error) + TransactionInBlock(ctx context.Context, blockHash common.Hash, index uint) (*types.Transaction, error) + TransactionReceipt(ctx context.Context, txHash common.Hash) (*types.Receipt, error) + SyncProgress(ctx context.Context) (*ethereum.SyncProgress, error) + SubscribeNewHead(ctx context.Context, ch chan<- *types.Header) (ethereum.Subscription, error) + + // State Access + NetworkID(ctx context.Context) (*big.Int, error) + BalanceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (*big.Int, error) + StorageAt(ctx context.Context, account common.Address, key common.Hash, blockNumber *big.Int) ([]byte, error) + CodeAt(ctx context.Context, account common.Address, blockNumber *big.Int) ([]byte, error) + NonceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (uint64, error) + + // Filters + FilterLogs(ctx context.Context, q ethereum.FilterQuery) ([]types.Log, error) + SubscribeFilterLogs(ctx context.Context, q ethereum.FilterQuery, ch chan<- types.Log) (ethereum.Subscription, error) + + // Pending State + PendingBalanceAt(ctx context.Context, account common.Address) (*big.Int, error) + PendingStorageAt(ctx context.Context, account common.Address, key common.Hash) ([]byte, error) + PendingCodeAt(ctx context.Context, account common.Address) ([]byte, error) + PendingNonceAt(ctx context.Context, account common.Address) (uint64, error) + PendingTransactionCount(ctx context.Context) (uint, error) + + // Contract Calling + CallContract(ctx context.Context, msg ethereum.CallMsg, blockNumber *big.Int) ([]byte, error) + PendingCallContract(ctx context.Context, msg ethereum.CallMsg) ([]byte, error) + SuggestGasPrice(ctx context.Context) (*big.Int, error) + EstimateGas(ctx context.Context, msg ethereum.CallMsg) (uint64, error) + SendTransaction(ctx context.Context, tx *types.Transaction) error + + // Utility + Close() +} diff --git a/op-service/dial/static_l2_provider.go b/op-service/dial/static_l2_provider.go index 1f557c03ea60..f9a42b4c9d11 100644 --- a/op-service/dial/static_l2_provider.go +++ b/op-service/dial/static_l2_provider.go @@ -13,7 +13,7 @@ import ( type L2EndpointProvider interface { RollupProvider // EthClient(ctx) returns the underlying ethclient pointing to the L2 execution node - EthClient(ctx context.Context) (*ethclient.Client, error) + EthClient(ctx context.Context) (ClientInterface, error) } // StaticL2EndpointProvider is a L2EndpointProvider that always returns the same static RollupClient and eth client @@ -38,7 +38,7 @@ func NewStaticL2EndpointProvider(ctx context.Context, log log.Logger, ethClientU }, nil } -func (p *StaticL2EndpointProvider) EthClient(context.Context) (*ethclient.Client, error) { +func (p *StaticL2EndpointProvider) EthClient(context.Context) (ClientInterface, error) { return p.ethClient, nil } diff --git a/op-service/dial/static_rollup_provider.go b/op-service/dial/static_rollup_provider.go index 44852626d547..7c451861cb97 100644 --- a/op-service/dial/static_rollup_provider.go +++ b/op-service/dial/static_rollup_provider.go @@ -11,7 +11,7 @@ import ( // It manages the lifecycle of the RollupClient for callers type RollupProvider interface { // RollupClient(ctx) returns the underlying sources.RollupClient pointing to the L2 rollup consensus node - RollupClient(ctx context.Context) (*sources.RollupClient, error) + RollupClient(ctx context.Context) (sources.RollupClientInterface, error) // Close() closes the underlying client or clients Close() } @@ -39,7 +39,7 @@ func NewStaticL2RollupProviderFromExistingRollup(rollupCl *sources.RollupClient) }, nil } -func (p *StaticL2RollupProvider) RollupClient(context.Context) (*sources.RollupClient, error) { +func (p *StaticL2RollupProvider) RollupClient(context.Context) (sources.RollupClientInterface, error) { return p.rollupClient, nil } diff --git a/op-service/sources/rollupclient_interface.go b/op-service/sources/rollupclient_interface.go new file mode 100644 index 000000000000..dd192c60564b --- /dev/null +++ b/op-service/sources/rollupclient_interface.go @@ -0,0 +1,20 @@ +package sources + +import ( + "context" + + "github.com/ethereum-optimism/optimism/op-node/rollup" + "github.com/ethereum-optimism/optimism/op-service/eth" + "github.com/ethereum/go-ethereum/common" +) + +type RollupClientInterface interface { + OutputAtBlock(ctx context.Context, blockNum uint64) (*eth.OutputResponse, error) + SyncStatus(ctx context.Context) (*eth.SyncStatus, error) + RollupConfig(ctx context.Context) (*rollup.Config, error) + Version(ctx context.Context) (string, error) + StartSequencer(ctx context.Context, unsafeHead common.Hash) error + StopSequencer(ctx context.Context) (common.Hash, error) + SequencerActive(ctx context.Context) (bool, error) + Close() +} From 3a6e5e12cd79811c909cbbeed3a057188ff471a0 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Wed, 13 Dec 2023 13:23:17 -0500 Subject: [PATCH 10/41] op-service: update mocks and interfaces for endpoint provider testing. --- op-service/dial/active_l2_provider.go | 3 +- op-service/dial/active_rollup_provider.go | 2 +- op-service/dial/client_interface.go | 45 +-------------- op-service/sources/rollupclient_interface.go | 5 +- op-service/testutils/mock_eth_client.go | 9 +++ op-service/testutils/mock_rollup_client.go | 58 ++++++++++++++++++++ 6 files changed, 73 insertions(+), 49 deletions(-) create mode 100644 op-service/testutils/mock_rollup_client.go diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 21f0e861490f..cd225fed9d97 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -6,7 +6,6 @@ import ( "fmt" "time" - "github.com/ethereum/go-ethereum/ethclient" "github.com/ethereum/go-ethereum/log" ) @@ -15,7 +14,7 @@ const DefaultActiveSequencerFollowerCheckDuration = 2 * DefaultDialTimeout type ActiveL2EndpointProvider struct { ActiveL2RollupProvider ethEndpoints []string - currentEthClient *ethclient.Client + currentEthClient ClientInterface } func NewActiveL2EndpointProvider( diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index a956da850b54..23c492822c5f 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -19,7 +19,7 @@ type ActiveL2RollupProvider struct { activeTimeout time.Time - currentRollupClient *sources.RollupClient + currentRollupClient sources.RollupClientInterface clientLock *sync.Mutex } diff --git a/op-service/dial/client_interface.go b/op-service/dial/client_interface.go index 2ab9287be497..1fae596e0d4c 100644 --- a/op-service/dial/client_interface.go +++ b/op-service/dial/client_interface.go @@ -4,54 +4,13 @@ import ( "context" "math/big" - "github.com/ethereum/go-ethereum" - "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/rpc" ) +// ClientInterface is an interface for providing an ethclient.Client +// It does not describe all of the functions an ethclient.Client has, only the ones used by callers of the L2 Providers type ClientInterface interface { - // Blockchain Access - ChainID(ctx context.Context) (*big.Int, error) - BlockByHash(ctx context.Context, hash common.Hash) (*types.Block, error) BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error) - BlockNumber(ctx context.Context) (uint64, error) - BlockReceipts(ctx context.Context, blockNrOrHash rpc.BlockNumberOrHash) ([]*types.Receipt, error) - HeaderByHash(ctx context.Context, hash common.Hash) (*types.Header, error) - HeaderByNumber(ctx context.Context, number *big.Int) (*types.Header, error) - TransactionByHash(ctx context.Context, hash common.Hash) (*types.Transaction, bool, error) - TransactionSender(ctx context.Context, tx *types.Transaction, block common.Hash, index uint) (common.Address, error) - TransactionCount(ctx context.Context, blockHash common.Hash) (uint, error) - TransactionInBlock(ctx context.Context, blockHash common.Hash, index uint) (*types.Transaction, error) - TransactionReceipt(ctx context.Context, txHash common.Hash) (*types.Receipt, error) - SyncProgress(ctx context.Context) (*ethereum.SyncProgress, error) - SubscribeNewHead(ctx context.Context, ch chan<- *types.Header) (ethereum.Subscription, error) - // State Access - NetworkID(ctx context.Context) (*big.Int, error) - BalanceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (*big.Int, error) - StorageAt(ctx context.Context, account common.Address, key common.Hash, blockNumber *big.Int) ([]byte, error) - CodeAt(ctx context.Context, account common.Address, blockNumber *big.Int) ([]byte, error) - NonceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (uint64, error) - - // Filters - FilterLogs(ctx context.Context, q ethereum.FilterQuery) ([]types.Log, error) - SubscribeFilterLogs(ctx context.Context, q ethereum.FilterQuery, ch chan<- types.Log) (ethereum.Subscription, error) - - // Pending State - PendingBalanceAt(ctx context.Context, account common.Address) (*big.Int, error) - PendingStorageAt(ctx context.Context, account common.Address, key common.Hash) ([]byte, error) - PendingCodeAt(ctx context.Context, account common.Address) ([]byte, error) - PendingNonceAt(ctx context.Context, account common.Address) (uint64, error) - PendingTransactionCount(ctx context.Context) (uint, error) - - // Contract Calling - CallContract(ctx context.Context, msg ethereum.CallMsg, blockNumber *big.Int) ([]byte, error) - PendingCallContract(ctx context.Context, msg ethereum.CallMsg) ([]byte, error) - SuggestGasPrice(ctx context.Context) (*big.Int, error) - EstimateGas(ctx context.Context, msg ethereum.CallMsg) (uint64, error) - SendTransaction(ctx context.Context, tx *types.Transaction) error - - // Utility Close() } diff --git a/op-service/sources/rollupclient_interface.go b/op-service/sources/rollupclient_interface.go index dd192c60564b..43541f4fea3b 100644 --- a/op-service/sources/rollupclient_interface.go +++ b/op-service/sources/rollupclient_interface.go @@ -8,13 +8,12 @@ import ( "github.com/ethereum/go-ethereum/common" ) +// RollupClientInterface is an interface for providing a RollupClient +// It does not describe all of the functions a RollupClient has, only the ones used by the L2 Providers and their callers type RollupClientInterface interface { - OutputAtBlock(ctx context.Context, blockNum uint64) (*eth.OutputResponse, error) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) RollupConfig(ctx context.Context) (*rollup.Config, error) - Version(ctx context.Context) (string, error) StartSequencer(ctx context.Context, unsafeHead common.Hash) error - StopSequencer(ctx context.Context) (common.Hash, error) SequencerActive(ctx context.Context) (bool, error) Close() } diff --git a/op-service/testutils/mock_eth_client.go b/op-service/testutils/mock_eth_client.go index aa96da3ddeca..4447c8871fad 100644 --- a/op-service/testutils/mock_eth_client.go +++ b/op-service/testutils/mock_eth_client.go @@ -2,6 +2,7 @@ package testutils import ( "context" + "math/big" "github.com/stretchr/testify/mock" @@ -128,3 +129,11 @@ func (m *MockEthClient) ReadStorageAt(ctx context.Context, address common.Addres func (m *MockEthClient) ExpectReadStorageAt(ctx context.Context, address common.Address, storageSlot common.Hash, blockHash common.Hash, result common.Hash, err error) { m.Mock.On("ReadStorageAt", address, storageSlot, blockHash).Once().Return(result, &err) } + +func (m *MockEthClient) BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error) { + return m.Mock.MethodCalled(("BlockByNumber"), number).Get(0).(*types.Block), nil +} + +func (m *MockEthClient) ExpectBlockByNumber(number *big.Int, block *types.Block, err error) { + m.Mock.On("BlockByNumber", number).Once().Return(block, &err) +} diff --git a/op-service/testutils/mock_rollup_client.go b/op-service/testutils/mock_rollup_client.go new file mode 100644 index 000000000000..dca6bcadc159 --- /dev/null +++ b/op-service/testutils/mock_rollup_client.go @@ -0,0 +1,58 @@ +package testutils + +import ( + "context" + + "github.com/ethereum-optimism/optimism/op-node/rollup" + "github.com/ethereum-optimism/optimism/op-service/eth" + "github.com/ethereum/go-ethereum/common" + "github.com/stretchr/testify/mock" +) + +type MockRollupClient struct { + mock.Mock +} + +func (m *MockRollupClient) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) { + out := m.Mock.MethodCalled("SyncStatus") + return out[0].(*eth.SyncStatus), *out[1].(*error) +} + +func (m *MockRollupClient) ExpectSyncStatus(status *eth.SyncStatus, err error) { + m.Mock.On("SyncStatus").Once().Return(status, &err) +} + +func (m *MockRollupClient) RollupConfig(ctx context.Context) (*rollup.Config, error) { + out := m.Mock.MethodCalled("RollupConfig") + return out[0].(*rollup.Config), *out[1].(*error) +} + +func (m *MockRollupClient) ExpectRollupConfig(config *rollup.Config, err error) { + m.Mock.On("RollupConfig").Once().Return(config, &err) +} + +func (m *MockRollupClient) StartSequencer(ctx context.Context, unsafeHead common.Hash) error { + out := m.Mock.MethodCalled("StartSequencer", unsafeHead) + return *out[0].(*error) +} + +func (m *MockRollupClient) ExpectStartSequencer(unsafeHead common.Hash, err error) { + m.Mock.On("StartSequencer", unsafeHead).Once().Return(&err) +} + +func (m *MockRollupClient) SequencerActive(ctx context.Context) (bool, error) { + out := m.Mock.MethodCalled("SequencerActive") + return out[0].(bool), *out[1].(*error) +} + +func (m *MockRollupClient) ExpectSequencerActive(active bool, err error) { + m.Mock.On("SequencerActive").Once().Return(active, &err) +} + +func (m *MockRollupClient) ExpectClose() { + m.Mock.On("Close").Once() +} + +func (m *MockRollupClient) Close() { + m.Mock.MethodCalled("Close") +} From 52411326bd020c49fe6dfd1c730ba51daef6c460 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Wed, 13 Dec 2023 15:01:49 -0500 Subject: [PATCH 11/41] op-service - WIP on Active L2 Providers: unit tests pass, design and impl contains TODOs. --- op-batcher/batcher/service.go | 2 +- op-proposer/proposer/service.go | 2 +- op-service/dial/active_l2_provider.go | 90 ++++------------- op-service/dial/active_l2_provider_test.go | 81 ++++++++++++++-- op-service/dial/active_rollup_provider.go | 96 +++++++++---------- ...nt_interface.go => ethclient_interface.go} | 4 +- .../rollupclient_interface.go | 2 +- op-service/dial/static_l2_provider.go | 4 +- op-service/dial/static_rollup_provider.go | 4 +- op-service/testutils/mock_eth_client.go | 8 ++ 10 files changed, 153 insertions(+), 140 deletions(-) rename op-service/dial/{client_interface.go => ethclient_interface.go} (73%) rename op-service/{sources => dial}/rollupclient_interface.go (97%) diff --git a/op-batcher/batcher/service.go b/op-batcher/batcher/service.go index 13647ca7adf9..76727023aad2 100644 --- a/op-batcher/batcher/service.go +++ b/op-batcher/batcher/service.go @@ -130,7 +130,7 @@ func (bs *BatcherService) initRPCClients(ctx context.Context, cfg *CLIConfig) er if strings.Contains(cfg.RollupRpc, ",") || strings.Contains(cfg.L2EthRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") ethUrls := strings.Split(cfg.L2EthRpc, ",") - endpointProvider, err = dial.NewActiveL2EndpointProvider(ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) + endpointProvider, err = dial.NewActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) } else { endpointProvider, err = dial.NewStaticL2EndpointProvider(ctx, bs.Log, cfg.L2EthRpc, cfg.RollupRpc) } diff --git a/op-proposer/proposer/service.go b/op-proposer/proposer/service.go index 77d38849d599..20cd186af734 100644 --- a/op-proposer/proposer/service.go +++ b/op-proposer/proposer/service.go @@ -125,7 +125,7 @@ func (ps *ProposerService) initRPCClients(ctx context.Context, cfg *CLIConfig) e var rollupProvider dial.RollupProvider if strings.Contains(cfg.RollupRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") - rollupProvider, err = dial.NewActiveL2RollupProvider(rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log) + rollupProvider, err = dial.NewActiveL2RollupProvider(ctx, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log) } else { rollupProvider, err = dial.NewStaticL2RollupProvider(ctx, ps.Log, cfg.RollupRpc) } diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index cd225fed9d97..66504a8ac3d9 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -13,11 +13,11 @@ const DefaultActiveSequencerFollowerCheckDuration = 2 * DefaultDialTimeout type ActiveL2EndpointProvider struct { ActiveL2RollupProvider - ethEndpoints []string - currentEthClient ClientInterface + ethClients []EthClientInterface } func NewActiveL2EndpointProvider( + ctx context.Context, ethUrls, rollupUrls []string, checkDuration time.Duration, networkTimeout time.Duration, @@ -30,93 +30,41 @@ func NewActiveL2EndpointProvider( return nil, errors.New("number of eth urls and rollup urls mismatch") } - rollupProvider, err := NewActiveL2RollupProvider(rollupUrls, checkDuration, networkTimeout, logger) + rollupProvider, err := NewActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger) if err != nil { return nil, err } + cctx, cancel := context.WithTimeout(ctx, networkTimeout) + defer cancel() + ethClients := make([]EthClientInterface, 0, len(ethUrls)) + for _, url := range ethUrls { + + ethClient, err := DialEthClientWithTimeout(cctx, networkTimeout, logger, url) + if err != nil { + return nil, fmt.Errorf("dialing eth client: %w", err) + } + ethClients = append(ethClients, ethClient) + } return &ActiveL2EndpointProvider{ ActiveL2RollupProvider: *rollupProvider, - ethEndpoints: ethUrls, + ethClients: ethClients, }, nil } -func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (ClientInterface, error) { +func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err } p.clientLock.Lock() defer p.clientLock.Unlock() - return p.currentEthClient, nil -} - -func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { - if !p.shouldCheck() { - return nil - } - - if err := p.findActiveEndpoints(ctx); err != nil { - return err - } - p.activeTimeout = time.Now().Add(p.checkDuration) - return nil -} - -func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { - // If current is not active, dial new sequencers until finding an active one. - ts := time.Now() - for i := 0; ; i++ { - active, err := p.checkCurrentSequencer(ctx) - if err != nil { - if ctx.Err() != nil { - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) - return fmt.Errorf("querying active sequencer: %w", err) - } - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) - } else if active { - p.log.Debug("Current sequencer active.", "try", i) - return nil - } else { - p.log.Info("Current sequencer inactive, trying next.", "try", i) - } - - // After iterating over all endpoints, sleep if all were just inactive, - // to avoid spamming the sequencers in a loop. - if (i+1)%p.NumEndpoints() == 0 { - d := time.Until(ts.Add(p.checkDuration)) - time.Sleep(d) // accepts negative - ts = time.Now() - } - - if err := p.dialNextSequencer(ctx, i); err != nil { - return fmt.Errorf("dialing next sequencer: %w", err) - } - } -} - -func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context, idx int) error { - cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) - defer cancel() - - ethClient, err := DialEthClientWithTimeout(cctx, p.networkTimeout, p.log, p.ethEndpoints[idx]) - if err != nil { - return fmt.Errorf("dialing eth client: %w", err) - } - - rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, p.rollupEndpoints[idx]) - if err != nil { - return fmt.Errorf("dialing rollup client: %w", err) - } - p.clientLock.Lock() - defer p.clientLock.Unlock() - p.currentEthClient, p.currentRollupClient = ethClient, rollupClient - return nil + return p.ethClients[p.currentIdx], nil } func (p *ActiveL2EndpointProvider) Close() { - if p.currentEthClient != nil { - p.currentEthClient.Close() + for _, ethClient := range p.ethClients { + ethClient.Close() } p.ActiveL2RollupProvider.Close() } diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 95eeefa4db07..6889adaa3a93 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -1,16 +1,81 @@ package dial import ( + "context" + "sync" "testing" + "time" + + "github.com/ethereum-optimism/optimism/op-service/testlog" + "github.com/ethereum-optimism/optimism/op-service/testutils" + "github.com/ethereum/go-ethereum/log" + "github.com/stretchr/testify/require" ) +// do a test with just rollupclients +// then do a test with rollupclients and ethclients // TestActiveSequencerFailoverBehavior tests the behavior of the ActiveSequencerProvider when the active sequencer fails -func TestActiveSequencerFailoverBehavior(t *testing.T) { - // set up a few mock ethclients - // set up a few mock rollup clients - // make the first mock rollup client return Active, then Inactive - // make the second mock rollup client return Inactive, then Active - // make ActiveL2EndpointProvider, probably manually - // ask it for a rollup client, should get the first one - // ask it for a rollup client, should get the second one +func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { + primarySequencer := testutils.MockRollupClient{} + primarySequencer.ExpectSequencerActive(true, nil) + primarySequencer.ExpectSequencerActive(false, nil) + secondarySequencer := testutils.MockRollupClient{} + secondarySequencer.ExpectSequencerActive(true, nil) + + rollupClients := []RollupClientInterface{} + rollupClients = append(rollupClients, &primarySequencer) + rollupClients = append(rollupClients, &secondarySequencer) + + endpointProvider := ActiveL2RollupProvider{ + rollupClients: rollupClients, + checkDuration: 1 * time.Duration(time.Microsecond), + networkTimeout: 1 * time.Duration(time.Second), + log: testlog.Logger(t, log.LvlDebug), + activeTimeout: time.Now(), + currentIdx: 0, + clientLock: &sync.Mutex{}, + } + _, err := endpointProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Equal(t, 0, endpointProvider.currentIdx) + _, err = endpointProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Equal(t, 1, endpointProvider.currentIdx) +} + +func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { + primarySequencer := testutils.MockRollupClient{} + primarySequencer.ExpectSequencerActive(true, nil) + primarySequencer.ExpectSequencerActive(false, nil) + secondarySequencer := testutils.MockRollupClient{} + secondarySequencer.ExpectSequencerActive(true, nil) + + rollupClients := []RollupClientInterface{} + rollupClients = append(rollupClients, &primarySequencer) + rollupClients = append(rollupClients, &secondarySequencer) + + rollupProvider := ActiveL2RollupProvider{ + rollupClients: rollupClients, + checkDuration: 1 * time.Duration(time.Microsecond), + networkTimeout: 1 * time.Duration(time.Second), + log: testlog.Logger(t, log.LvlDebug), + activeTimeout: time.Now(), + currentIdx: 0, + clientLock: &sync.Mutex{}, + } + ethClients := []EthClientInterface{} + primaryEthClient := testutils.MockEthClient{} + ethClients = append(ethClients, &primaryEthClient) + secondaryEthClient := testutils.MockEthClient{} + ethClients = append(ethClients, &secondaryEthClient) + endpointProvider := ActiveL2EndpointProvider{ + ActiveL2RollupProvider: rollupProvider, + ethClients: ethClients, + } + _, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Equal(t, 0, endpointProvider.currentIdx) + _, err = endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Equal(t, 1, endpointProvider.currentIdx) } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 23c492822c5f..eeed24421610 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -7,23 +7,23 @@ import ( "sync" "time" - "github.com/ethereum-optimism/optimism/op-service/sources" "github.com/ethereum/go-ethereum/log" ) type ActiveL2RollupProvider struct { - rollupEndpoints []string - checkDuration time.Duration - networkTimeout time.Duration - log log.Logger + checkDuration time.Duration + networkTimeout time.Duration + log log.Logger activeTimeout time.Time - currentRollupClient sources.RollupClientInterface - clientLock *sync.Mutex + currentIdx int + rollupClients []RollupClientInterface + clientLock *sync.Mutex } func NewActiveL2RollupProvider( + ctx context.Context, rollupUrls []string, checkDuration time.Duration, networkTimeout time.Duration, @@ -33,22 +33,35 @@ func NewActiveL2RollupProvider( return nil, errors.New("empty rollup urls list") } + cctx, cancel := context.WithTimeout(ctx, networkTimeout) + defer cancel() + + rollupClients := make([]RollupClientInterface, 0, len(rollupUrls)) + for _, url := range rollupUrls { + rollupClient, err := DialRollupClientWithTimeout(cctx, networkTimeout, logger, url) + if err != nil { + return nil, fmt.Errorf("dialing rollup client: %w", err) + } + rollupClients = append(rollupClients, rollupClient) + } + return &ActiveL2RollupProvider{ - rollupEndpoints: rollupUrls, - checkDuration: checkDuration, - networkTimeout: networkTimeout, - log: logger, + checkDuration: checkDuration, + networkTimeout: networkTimeout, + log: logger, + rollupClients: rollupClients, + clientLock: &sync.Mutex{}, }, nil } -func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (sources.RollupClientInterface, error) { +func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (RollupClientInterface, error) { err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err } p.clientLock.Lock() defer p.clientLock.Unlock() - return p.currentRollupClient, nil + return p.rollupClients[p.currentIdx], nil } func (p *ActiveL2RollupProvider) ensureActiveEndpoint(ctx context.Context) error { @@ -68,66 +81,45 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { } func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - // If current is not active, dial new sequencers until finding an active one. + const maxRetries = 10 ts := time.Now() - for i := 0; ; i++ { - active, err := p.checkCurrentSequencer(ctx) + for i := 0; i < maxRetries; i++ { + active, err := p.checkSequencer(ctx, i%p.numEndpoints()) if err != nil { + p.log.Warn("Error querying active sequencer", "err", err, "try", i) if ctx.Err() != nil { - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) return fmt.Errorf("querying active sequencer: %w", err) } - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", i) } else if active { - p.log.Debug("Current sequencer active.", "try", i) + p.log.Debug("Current sequencer active", "index", i) + p.currentIdx = i return nil } else { - p.log.Info("Current sequencer inactive, trying next.", "try", i) + p.log.Info("Current sequencer inactive", "index", i) } - // After iterating over all endpoints, sleep if all were just inactive, - // to avoid spamming the sequencers in a loop. - if (i+1)%p.NumEndpoints() == 0 { + if i%p.numEndpoints() == 0 { d := time.Until(ts.Add(p.checkDuration)) - time.Sleep(d) // accepts negative - ts = time.Now() - } - - if err := p.dialNextSequencer(ctx, i); err != nil { - return fmt.Errorf("dialing next sequencer: %w", err) + time.Sleep(d) // Accepts negative duration } } + return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) } -func (p *ActiveL2RollupProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { - cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) - defer cancel() - p.clientLock.Lock() - defer p.clientLock.Unlock() - return p.currentRollupClient.SequencerActive(cctx) -} - -func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context, idx int) error { +func (p *ActiveL2RollupProvider) checkSequencer(ctx context.Context, idx int) (bool, error) { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - ep := p.rollupEndpoints[idx] - - rollupClient, err := DialRollupClientWithTimeout(cctx, p.networkTimeout, p.log, ep) - if err != nil { - return fmt.Errorf("dialing rollup client: %w", err) - } - p.clientLock.Lock() - defer p.clientLock.Unlock() - p.currentRollupClient = rollupClient - return nil + active, err := p.rollupClients[idx].SequencerActive(cctx) + p.log.Info("Checked whether sequencer is active", "index", idx, "active", active, "err", err) + return active, err } -func (p *ActiveL2RollupProvider) NumEndpoints() int { - return len(p.rollupEndpoints) +func (p *ActiveL2RollupProvider) numEndpoints() int { + return len(p.rollupClients) } func (p *ActiveL2RollupProvider) Close() { - if p.currentRollupClient != nil { - p.currentRollupClient.Close() + for _, client := range p.rollupClients { + client.Close() } } diff --git a/op-service/dial/client_interface.go b/op-service/dial/ethclient_interface.go similarity index 73% rename from op-service/dial/client_interface.go rename to op-service/dial/ethclient_interface.go index 1fae596e0d4c..58a2070974ee 100644 --- a/op-service/dial/client_interface.go +++ b/op-service/dial/ethclient_interface.go @@ -7,9 +7,9 @@ import ( "github.com/ethereum/go-ethereum/core/types" ) -// ClientInterface is an interface for providing an ethclient.Client +// EthClientInterface is an interface for providing an ethclient.Client // It does not describe all of the functions an ethclient.Client has, only the ones used by callers of the L2 Providers -type ClientInterface interface { +type EthClientInterface interface { BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error) Close() diff --git a/op-service/sources/rollupclient_interface.go b/op-service/dial/rollupclient_interface.go similarity index 97% rename from op-service/sources/rollupclient_interface.go rename to op-service/dial/rollupclient_interface.go index 43541f4fea3b..30a9ccda4916 100644 --- a/op-service/sources/rollupclient_interface.go +++ b/op-service/dial/rollupclient_interface.go @@ -1,4 +1,4 @@ -package sources +package dial import ( "context" diff --git a/op-service/dial/static_l2_provider.go b/op-service/dial/static_l2_provider.go index f9a42b4c9d11..8d51f062b9af 100644 --- a/op-service/dial/static_l2_provider.go +++ b/op-service/dial/static_l2_provider.go @@ -13,7 +13,7 @@ import ( type L2EndpointProvider interface { RollupProvider // EthClient(ctx) returns the underlying ethclient pointing to the L2 execution node - EthClient(ctx context.Context) (ClientInterface, error) + EthClient(ctx context.Context) (EthClientInterface, error) } // StaticL2EndpointProvider is a L2EndpointProvider that always returns the same static RollupClient and eth client @@ -38,7 +38,7 @@ func NewStaticL2EndpointProvider(ctx context.Context, log log.Logger, ethClientU }, nil } -func (p *StaticL2EndpointProvider) EthClient(context.Context) (ClientInterface, error) { +func (p *StaticL2EndpointProvider) EthClient(context.Context) (EthClientInterface, error) { return p.ethClient, nil } diff --git a/op-service/dial/static_rollup_provider.go b/op-service/dial/static_rollup_provider.go index 7c451861cb97..a40bb667290a 100644 --- a/op-service/dial/static_rollup_provider.go +++ b/op-service/dial/static_rollup_provider.go @@ -11,7 +11,7 @@ import ( // It manages the lifecycle of the RollupClient for callers type RollupProvider interface { // RollupClient(ctx) returns the underlying sources.RollupClient pointing to the L2 rollup consensus node - RollupClient(ctx context.Context) (sources.RollupClientInterface, error) + RollupClient(ctx context.Context) (RollupClientInterface, error) // Close() closes the underlying client or clients Close() } @@ -39,7 +39,7 @@ func NewStaticL2RollupProviderFromExistingRollup(rollupCl *sources.RollupClient) }, nil } -func (p *StaticL2RollupProvider) RollupClient(context.Context) (sources.RollupClientInterface, error) { +func (p *StaticL2RollupProvider) RollupClient(context.Context) (RollupClientInterface, error) { return p.rollupClient, nil } diff --git a/op-service/testutils/mock_eth_client.go b/op-service/testutils/mock_eth_client.go index 4447c8871fad..fa8bd57d0537 100644 --- a/op-service/testutils/mock_eth_client.go +++ b/op-service/testutils/mock_eth_client.go @@ -137,3 +137,11 @@ func (m *MockEthClient) BlockByNumber(ctx context.Context, number *big.Int) (*ty func (m *MockEthClient) ExpectBlockByNumber(number *big.Int, block *types.Block, err error) { m.Mock.On("BlockByNumber", number).Once().Return(block, &err) } + +func (m *MockEthClient) ExpectClose() { + m.Mock.On("Close").Once() +} + +func (m *MockEthClient) Close() { + m.Mock.MethodCalled("Close") +} From aabbe52f6b03f69dbc7b2e11d5f0877d469b16ea Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Wed, 13 Dec 2023 16:10:44 -0500 Subject: [PATCH 12/41] op-service: restore design in Active Endpoint Providers that only keeps one client open at a time. --- op-batcher/batcher/service.go | 2 +- op-proposer/proposer/service.go | 2 +- op-service/dial/active_l2_provider.go | 93 ++++++++++++++---- op-service/dial/active_l2_provider_test.go | 104 ++++++++++++--------- op-service/dial/active_rollup_provider.go | 94 +++++++++++-------- op-service/dial/dial.go | 8 ++ 6 files changed, 201 insertions(+), 102 deletions(-) diff --git a/op-batcher/batcher/service.go b/op-batcher/batcher/service.go index 76727023aad2..e4d9d08437de 100644 --- a/op-batcher/batcher/service.go +++ b/op-batcher/batcher/service.go @@ -130,7 +130,7 @@ func (bs *BatcherService) initRPCClients(ctx context.Context, cfg *CLIConfig) er if strings.Contains(cfg.RollupRpc, ",") || strings.Contains(cfg.L2EthRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") ethUrls := strings.Split(cfg.L2EthRpc, ",") - endpointProvider, err = dial.NewActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) + endpointProvider, err = dial.NewActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log, dial.DialEthClientInterfaceWithTimeout, dial.DialRollupClientInterfaceWithTimeout) } else { endpointProvider, err = dial.NewStaticL2EndpointProvider(ctx, bs.Log, cfg.L2EthRpc, cfg.RollupRpc) } diff --git a/op-proposer/proposer/service.go b/op-proposer/proposer/service.go index 20cd186af734..70328d776d7b 100644 --- a/op-proposer/proposer/service.go +++ b/op-proposer/proposer/service.go @@ -125,7 +125,7 @@ func (ps *ProposerService) initRPCClients(ctx context.Context, cfg *CLIConfig) e var rollupProvider dial.RollupProvider if strings.Contains(cfg.RollupRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") - rollupProvider, err = dial.NewActiveL2RollupProvider(ctx, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log) + rollupProvider, err = dial.NewActiveL2RollupProvider(ctx, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log, dial.DialRollupClientInterfaceWithTimeout) } else { rollupProvider, err = dial.NewStaticL2RollupProvider(ctx, ps.Log, cfg.RollupRpc) } diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 66504a8ac3d9..c6b65db63c08 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -11,9 +11,12 @@ import ( const DefaultActiveSequencerFollowerCheckDuration = 2 * DefaultDialTimeout +type ethDialer func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) + type ActiveL2EndpointProvider struct { ActiveL2RollupProvider - ethClients []EthClientInterface + currentEthClient EthClientInterface + ethDialer ethDialer } func NewActiveL2EndpointProvider( @@ -22,6 +25,8 @@ func NewActiveL2EndpointProvider( checkDuration time.Duration, networkTimeout time.Duration, logger log.Logger, + ethDialer ethDialer, + rollupDialer rollupDialer, ) (*ActiveL2EndpointProvider, error) { if len(rollupUrls) == 0 { return nil, errors.New("empty rollup urls list") @@ -30,41 +35,93 @@ func NewActiveL2EndpointProvider( return nil, errors.New("number of eth urls and rollup urls mismatch") } - rollupProvider, err := NewActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger) + rollupProvider, err := NewActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) if err != nil { return nil, err } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - ethClients := make([]EthClientInterface, 0, len(ethUrls)) - for _, url := range ethUrls { - - ethClient, err := DialEthClientWithTimeout(cctx, networkTimeout, logger, url) - if err != nil { - return nil, fmt.Errorf("dialing eth client: %w", err) - } - ethClients = append(ethClients, ethClient) + ethClient, err := ethDialer(cctx, networkTimeout, logger, ethUrls[0]) + if err != nil { + return nil, fmt.Errorf("dialing eth client: %w", err) } - return &ActiveL2EndpointProvider{ ActiveL2RollupProvider: *rollupProvider, - ethClients: ethClients, + currentEthClient: ethClient, + ethDialer: ethDialer, }, nil } func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { + p.clientLock.Lock() + defer p.clientLock.Unlock() err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err } - p.clientLock.Lock() - defer p.clientLock.Unlock() - return p.ethClients[p.currentIdx], nil + + return p.currentEthClient, nil } -func (p *ActiveL2EndpointProvider) Close() { - for _, ethClient := range p.ethClients { - ethClient.Close() +func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { + if !p.shouldCheck() { + return nil + } + + if err := p.findActiveEndpoints(ctx); err != nil { + return err + } + p.activeTimeout = time.Now().Add(p.checkDuration) + return nil +} + +func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { + const maxRetries = 20 + totalAttempts := 0 + + for totalAttempts < maxRetries { + active, err := p.checkCurrentSequencer(ctx) + if err != nil { + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", totalAttempts) + } else if active { + p.log.Debug("Current sequencer active.", "try", totalAttempts) + return nil + } else { + p.log.Info("Current sequencer inactive, trying next.", "try", totalAttempts) + } + if err := p.dialNextSequencer(ctx); err != nil { + return fmt.Errorf("dialing next sequencer: %w", err) + } + + totalAttempts++ + if p.currentIndex >= p.numEndpoints() { + p.currentIndex = 0 + } } + return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) +} + +func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context) error { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + p.currentIndex++ + ep := p.rollupUrls[p.currentIndex] + p.log.Debug("Dialing next sequencer.", "url", ep) + rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) + if err != nil { + return fmt.Errorf("dialing rollup client: %w", err) + } + ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) + if err != nil { + return fmt.Errorf("dialing eth client: %w", err) + } + + p.currentRollupClient = rollupClient + p.currentEthClient = ethClient + return nil +} + +func (p *ActiveL2EndpointProvider) Close() { + p.currentEthClient.Close() p.ActiveL2RollupProvider.Close() } diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 6889adaa3a93..720075f3744b 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -2,7 +2,7 @@ package dial import ( "context" - "sync" + "fmt" "testing" "time" @@ -12,70 +12,90 @@ import ( "github.com/stretchr/testify/require" ) -// do a test with just rollupclients -// then do a test with rollupclients and ethclients -// TestActiveSequencerFailoverBehavior tests the behavior of the ActiveSequencerProvider when the active sequencer fails +// TestActiveSequencerFailoverBehavior_RollupProvider tests that the ActiveL2RollupProvider +// will failover to the next provider if the current one is not active. func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { + // Create two mock rollup clients, one of which will declare itself inactive after first check. primarySequencer := testutils.MockRollupClient{} primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(false, nil) secondarySequencer := testutils.MockRollupClient{} secondarySequencer.ExpectSequencerActive(true, nil) - rollupClients := []RollupClientInterface{} - rollupClients = append(rollupClients, &primarySequencer) - rollupClients = append(rollupClients, &secondarySequencer) - - endpointProvider := ActiveL2RollupProvider{ - rollupClients: rollupClients, - checkDuration: 1 * time.Duration(time.Microsecond), - networkTimeout: 1 * time.Duration(time.Second), - log: testlog.Logger(t, log.LvlDebug), - activeTimeout: time.Now(), - currentIdx: 0, - clientLock: &sync.Mutex{}, + mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + if url == "primary" { + return &primarySequencer, nil + } else if url == "secondary" { + return &secondarySequencer, nil + } else { + return nil, fmt.Errorf("unknown test url: %s", url) + } } - _, err := endpointProvider.RollupClient(context.Background()) + + endpointProvider, err := NewActiveL2RollupProvider( + context.Background(), + []string{"primary", "secondary"}, + 1*time.Microsecond, + 1*time.Minute, + testlog.Logger(t, log.LvlDebug), + mockRollupDialer, + ) + require.NoError(t, err) + // Check that the first client is used, then the second once the first declares itself inactive. + firstSequencerUsed, err := endpointProvider.RollupClient(context.Background()) require.NoError(t, err) - require.Equal(t, 0, endpointProvider.currentIdx) - _, err = endpointProvider.RollupClient(context.Background()) + require.True(t, &primarySequencer == firstSequencerUsed) // avoids copying the struct (and its mutex, etc.) + secondSequencerUsed, err := endpointProvider.RollupClient(context.Background()) require.NoError(t, err) - require.Equal(t, 1, endpointProvider.currentIdx) + require.True(t, &secondarySequencer == secondSequencerUsed) } +// TestActiveSequencerFailoverBehavior_L2Providers tests that the ActiveL2EndpointProvider +// will failover to the next provider if the current one is not active. func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { + // as TestActiveSequencerFailoverBehavior_RollupProviders, + // but ensure the added `EthClient()` method also triggers the failover. primarySequencer := testutils.MockRollupClient{} primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(false, nil) secondarySequencer := testutils.MockRollupClient{} secondarySequencer.ExpectSequencerActive(true, nil) - rollupClients := []RollupClientInterface{} - rollupClients = append(rollupClients, &primarySequencer) - rollupClients = append(rollupClients, &secondarySequencer) - - rollupProvider := ActiveL2RollupProvider{ - rollupClients: rollupClients, - checkDuration: 1 * time.Duration(time.Microsecond), - networkTimeout: 1 * time.Duration(time.Second), - log: testlog.Logger(t, log.LvlDebug), - activeTimeout: time.Now(), - currentIdx: 0, - clientLock: &sync.Mutex{}, + mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + if url == "primary" { + return &primarySequencer, nil + } else if url == "secondary" { + return &secondarySequencer, nil + } else { + return nil, fmt.Errorf("unknown test url: %s", url) + } } - ethClients := []EthClientInterface{} primaryEthClient := testutils.MockEthClient{} - ethClients = append(ethClients, &primaryEthClient) secondaryEthClient := testutils.MockEthClient{} - ethClients = append(ethClients, &secondaryEthClient) - endpointProvider := ActiveL2EndpointProvider{ - ActiveL2RollupProvider: rollupProvider, - ethClients: ethClients, + mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { + if url == "primary" { + return &primaryEthClient, nil + } else if url == "secondary" { + return &secondaryEthClient, nil + } else { + return nil, fmt.Errorf("unknown test url: %s", url) + } } - _, err := endpointProvider.EthClient(context.Background()) + endpointProvider, err := NewActiveL2EndpointProvider( + context.Background(), + []string{"primary", "secondary"}, + []string{"primary", "secondary"}, + 1*time.Microsecond, + 1*time.Minute, + testlog.Logger(t, log.LvlDebug), + mockEthDialer, + mockRollupDialer, + ) + require.NoError(t, err) + firstClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) - require.Equal(t, 0, endpointProvider.currentIdx) - _, err = endpointProvider.EthClient(context.Background()) + require.True(t, &primaryEthClient == firstClientUsed) // avoids copying the struct (and its mutex, etc.) + secondClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) - require.Equal(t, 1, endpointProvider.currentIdx) + require.True(t, &secondaryEthClient == secondClientUsed) } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index eeed24421610..fa15616b8cb8 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -10,6 +10,8 @@ import ( "github.com/ethereum/go-ethereum/log" ) +type rollupDialer func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) + type ActiveL2RollupProvider struct { checkDuration time.Duration networkTimeout time.Duration @@ -17,9 +19,11 @@ type ActiveL2RollupProvider struct { activeTimeout time.Time - currentIdx int - rollupClients []RollupClientInterface - clientLock *sync.Mutex + rollupUrls []string + rollupDialer rollupDialer + currentRollupClient RollupClientInterface + currentIndex int + clientLock *sync.Mutex } func NewActiveL2RollupProvider( @@ -28,6 +32,7 @@ func NewActiveL2RollupProvider( checkDuration time.Duration, networkTimeout time.Duration, logger log.Logger, + dialer rollupDialer, ) (*ActiveL2RollupProvider, error) { if len(rollupUrls) == 0 { return nil, errors.New("empty rollup urls list") @@ -36,32 +41,30 @@ func NewActiveL2RollupProvider( cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - rollupClients := make([]RollupClientInterface, 0, len(rollupUrls)) - for _, url := range rollupUrls { - rollupClient, err := DialRollupClientWithTimeout(cctx, networkTimeout, logger, url) - if err != nil { - return nil, fmt.Errorf("dialing rollup client: %w", err) - } - rollupClients = append(rollupClients, rollupClient) + rollupClient, err := dialer(cctx, networkTimeout, logger, rollupUrls[0]) + if err != nil { + return nil, fmt.Errorf("dialing rollup client: %w", err) } return &ActiveL2RollupProvider{ - checkDuration: checkDuration, - networkTimeout: networkTimeout, - log: logger, - rollupClients: rollupClients, - clientLock: &sync.Mutex{}, + checkDuration: checkDuration, + networkTimeout: networkTimeout, + log: logger, + rollupUrls: rollupUrls, + rollupDialer: dialer, + currentRollupClient: rollupClient, + clientLock: &sync.Mutex{}, }, nil } func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (RollupClientInterface, error) { + p.clientLock.Lock() + defer p.clientLock.Unlock() err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err } - p.clientLock.Lock() - defer p.clientLock.Unlock() - return p.rollupClients[p.currentIdx], nil + return p.currentRollupClient, nil } func (p *ActiveL2RollupProvider) ensureActiveEndpoint(ctx context.Context) error { @@ -81,45 +84,56 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { } func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - const maxRetries = 10 - ts := time.Now() - for i := 0; i < maxRetries; i++ { - active, err := p.checkSequencer(ctx, i%p.numEndpoints()) + const maxRetries = 20 + totalAttempts := 0 + + for totalAttempts < maxRetries { + active, err := p.checkCurrentSequencer(ctx) if err != nil { - p.log.Warn("Error querying active sequencer", "err", err, "try", i) - if ctx.Err() != nil { - return fmt.Errorf("querying active sequencer: %w", err) - } + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", totalAttempts) } else if active { - p.log.Debug("Current sequencer active", "index", i) - p.currentIdx = i + p.log.Debug("Current sequencer active.", "try", totalAttempts) return nil } else { - p.log.Info("Current sequencer inactive", "index", i) + p.log.Info("Current sequencer inactive, trying next.", "try", totalAttempts) + } + if err := p.dialNextSequencer(ctx); err != nil { + return fmt.Errorf("dialing next sequencer: %w", err) } - if i%p.numEndpoints() == 0 { - d := time.Until(ts.Add(p.checkDuration)) - time.Sleep(d) // Accepts negative duration + totalAttempts++ + if p.currentIndex >= p.numEndpoints() { + p.currentIndex = 0 } } return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) } -func (p *ActiveL2RollupProvider) checkSequencer(ctx context.Context, idx int) (bool, error) { +func (p *ActiveL2RollupProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - active, err := p.rollupClients[idx].SequencerActive(cctx) - p.log.Info("Checked whether sequencer is active", "index", idx, "active", active, "err", err) - return active, err + return p.currentRollupClient.SequencerActive(cctx) } func (p *ActiveL2RollupProvider) numEndpoints() int { - return len(p.rollupClients) + return len(p.rollupUrls) } -func (p *ActiveL2RollupProvider) Close() { - for _, client := range p.rollupClients { - client.Close() +func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + p.currentIndex++ + ep := p.rollupUrls[p.currentIndex] + p.log.Debug("Dialing next sequencer in embedded func.", "url", ep) + rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) + if err != nil { + return fmt.Errorf("dialing rollup client: %w", err) } + + p.currentRollupClient = rollupClient + return nil +} + +func (p *ActiveL2RollupProvider) Close() { + p.currentRollupClient.Close() } diff --git a/op-service/dial/dial.go b/op-service/dial/dial.go index 11410197b7d3..616066f0d56f 100644 --- a/op-service/dial/dial.go +++ b/op-service/dial/dial.go @@ -33,6 +33,10 @@ func DialEthClientWithTimeout(ctx context.Context, timeout time.Duration, log lo return ethclient.NewClient(c), nil } +func DialEthClientInterfaceWithTimeout(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { + return DialEthClientWithTimeout(ctx, timeout, log, url) +} + // DialRollupClientWithTimeout attempts to dial the RPC provider using the provided URL. // If the dial doesn't complete within timeout seconds, this method will return an error. func DialRollupClientWithTimeout(ctx context.Context, timeout time.Duration, log log.Logger, url string) (*sources.RollupClient, error) { @@ -47,6 +51,10 @@ func DialRollupClientWithTimeout(ctx context.Context, timeout time.Duration, log return sources.NewRollupClient(client.NewBaseRPCClient(rpcCl)), nil } +func DialRollupClientInterfaceWithTimeout(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + return DialRollupClientWithTimeout(ctx, timeout, log, url) +} + // Dials a JSON-RPC endpoint repeatedly, with a backoff, until a client connection is established. Auth is optional. func dialRPCClientWithBackoff(ctx context.Context, log log.Logger, addr string) (*rpc.Client, error) { bOff := retry.Fixed(defaultRetryTime) From 9fb14ebfc71b0a39bba43860f0681a598433909c Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Wed, 13 Dec 2023 16:29:46 -0500 Subject: [PATCH 13/41] op-service: when dialing a new sequencer, close() the old connection. --- op-proposer/proposer/driver.go | 10 ++-------- op-service/dial/active_l2_provider.go | 8 ++++++-- op-service/dial/active_l2_provider_test.go | 3 +++ op-service/dial/active_rollup_provider.go | 6 ++++-- op-service/dial/rollupclient_interface.go | 1 + op-service/testutils/mock_rollup_client.go | 9 +++++++++ 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/op-proposer/proposer/driver.go b/op-proposer/proposer/driver.go index b15dff8096ee..6deb7fa90ea1 100644 --- a/op-proposer/proposer/driver.go +++ b/op-proposer/proposer/driver.go @@ -20,7 +20,6 @@ import ( "github.com/ethereum-optimism/optimism/op-proposer/metrics" "github.com/ethereum-optimism/optimism/op-service/dial" "github.com/ethereum-optimism/optimism/op-service/eth" - "github.com/ethereum-optimism/optimism/op-service/sources" "github.com/ethereum-optimism/optimism/op-service/txmgr" ) @@ -200,14 +199,9 @@ func (l *L2OutputSubmitter) fetchOutput(ctx context.Context, block *big.Int) (*e ctx, cancel := context.WithTimeout(ctx, l.Cfg.NetworkTimeout) defer cancel() - rollupInterface, err := l.RollupProvider.RollupClient(ctx) + rollupClient, err := l.RollupProvider.RollupClient(ctx) if err != nil { - l.Log.Error("proposer unable to get rollup interface", "err", err) - return nil, false, err - } - rollupClient, ok := rollupInterface.(*sources.RollupClient) - if !ok { - l.Log.Error("proposer unable to get rollup client", "block", block, "err", err) + l.Log.Error("proposer unable to get rollup client", "err", err) return nil, false, err } output, err := rollupClient.OutputAtBlock(ctx, block.Uint64()) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index c6b65db63c08..04da84eb263c 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -82,12 +82,16 @@ func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) erro for totalAttempts < maxRetries { active, err := p.checkCurrentSequencer(ctx) if err != nil { - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", totalAttempts) + p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "try", totalAttempts) + p.currentRollupClient.Close() + p.currentEthClient.Close() } else if active { p.log.Debug("Current sequencer active.", "try", totalAttempts) return nil } else { - p.log.Info("Current sequencer inactive, trying next.", "try", totalAttempts) + p.log.Info("Current sequencer inactive, closing connection and trying next.", "try", totalAttempts) + p.currentRollupClient.Close() + p.currentEthClient.Close() } if err := p.dialNextSequencer(ctx); err != nil { return fmt.Errorf("dialing next sequencer: %w", err) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 720075f3744b..e8e5839986fa 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -19,6 +19,7 @@ func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { primarySequencer := testutils.MockRollupClient{} primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(false, nil) + primarySequencer.ExpectClose() secondarySequencer := testutils.MockRollupClient{} secondarySequencer.ExpectSequencerActive(true, nil) @@ -58,6 +59,7 @@ func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { primarySequencer := testutils.MockRollupClient{} primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(false, nil) + primarySequencer.ExpectClose() secondarySequencer := testutils.MockRollupClient{} secondarySequencer.ExpectSequencerActive(true, nil) @@ -71,6 +73,7 @@ func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { } } primaryEthClient := testutils.MockEthClient{} + primaryEthClient.ExpectClose() secondaryEthClient := testutils.MockEthClient{} mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { if url == "primary" { diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index fa15616b8cb8..0e44261cc7d8 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -90,12 +90,14 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error for totalAttempts < maxRetries { active, err := p.checkCurrentSequencer(ctx) if err != nil { - p.log.Warn("Error querying active sequencer, trying next.", "err", err, "try", totalAttempts) + p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "try", totalAttempts) + p.currentRollupClient.Close() } else if active { p.log.Debug("Current sequencer active.", "try", totalAttempts) return nil } else { - p.log.Info("Current sequencer inactive, trying next.", "try", totalAttempts) + p.log.Info("Current sequencer inactive, closing connection and trying next.", "try", totalAttempts) + p.currentRollupClient.Close() } if err := p.dialNextSequencer(ctx); err != nil { return fmt.Errorf("dialing next sequencer: %w", err) diff --git a/op-service/dial/rollupclient_interface.go b/op-service/dial/rollupclient_interface.go index 30a9ccda4916..46e1afdc7264 100644 --- a/op-service/dial/rollupclient_interface.go +++ b/op-service/dial/rollupclient_interface.go @@ -11,6 +11,7 @@ import ( // RollupClientInterface is an interface for providing a RollupClient // It does not describe all of the functions a RollupClient has, only the ones used by the L2 Providers and their callers type RollupClientInterface interface { + OutputAtBlock(ctx context.Context, blockNum uint64) (*eth.OutputResponse, error) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) RollupConfig(ctx context.Context) (*rollup.Config, error) StartSequencer(ctx context.Context, unsafeHead common.Hash) error diff --git a/op-service/testutils/mock_rollup_client.go b/op-service/testutils/mock_rollup_client.go index dca6bcadc159..b914fd6f1603 100644 --- a/op-service/testutils/mock_rollup_client.go +++ b/op-service/testutils/mock_rollup_client.go @@ -13,6 +13,15 @@ type MockRollupClient struct { mock.Mock } +func (m *MockRollupClient) OutputAtBlock(ctx context.Context, blockNum uint64) (*eth.OutputResponse, error) { + out := m.Mock.MethodCalled("OutputAtBlock", blockNum) + return out[0].(*eth.OutputResponse), *out[1].(*error) +} + +func (m *MockRollupClient) ExpectOutputAtBlock(blockNum uint64, response *eth.OutputResponse, err error) { + m.Mock.On("OutputAtBlock", blockNum).Once().Return(response, &err) +} + func (m *MockRollupClient) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) { out := m.Mock.MethodCalled("SyncStatus") return out[0].(*eth.SyncStatus), *out[1].(*error) From 6ed8d575da9af81f1ad3fe5979096dd5a1ae695b Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Wed, 13 Dec 2023 17:59:16 -0500 Subject: [PATCH 14/41] op-service: obey coderabbit suggestion around safer handling of p.currentIndex in Active L2 Providers. --- op-service/dial/active_l2_provider.go | 5 +---- op-service/dial/active_rollup_provider.go | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 04da84eb263c..b943830b36f1 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -98,9 +98,6 @@ func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) erro } totalAttempts++ - if p.currentIndex >= p.numEndpoints() { - p.currentIndex = 0 - } } return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) } @@ -108,7 +105,7 @@ func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) erro func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - p.currentIndex++ + p.currentIndex = (p.currentIndex + 1) % p.numEndpoints() ep := p.rollupUrls[p.currentIndex] p.log.Debug("Dialing next sequencer.", "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 0e44261cc7d8..9791adc1de2a 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -104,9 +104,6 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error } totalAttempts++ - if p.currentIndex >= p.numEndpoints() { - p.currentIndex = 0 - } } return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) } @@ -124,7 +121,7 @@ func (p *ActiveL2RollupProvider) numEndpoints() int { func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - p.currentIndex++ + p.currentIndex = (p.currentIndex + 1) % p.numEndpoints() ep := p.rollupUrls[p.currentIndex] p.log.Debug("Dialing next sequencer in embedded func.", "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) From 5396c73d7a22feb1e251fffabf312c4c108f1f8c Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Thu, 14 Dec 2023 11:49:26 -0500 Subject: [PATCH 15/41] op-service, op-batcher, op-proposer: address review comments in PR#8585. --- op-batcher/batcher/config.go | 8 +++ op-batcher/batcher/service.go | 4 +- op-proposer/proposer/service.go | 2 +- op-service/dial/active_l2_provider.go | 84 +++++++--------------- op-service/dial/active_l2_provider_test.go | 54 +++++++------- op-service/dial/active_rollup_provider.go | 32 +++++---- op-service/dial/dial.go | 8 --- op-service/testutils/mock_eth_client.go | 7 +- op-service/testutils/mock_rollup_client.go | 32 ++++----- 9 files changed, 102 insertions(+), 129 deletions(-) diff --git a/op-batcher/batcher/config.go b/op-batcher/batcher/config.go index 036d164758fc..2690a7ad9c78 100644 --- a/op-batcher/batcher/config.go +++ b/op-batcher/batcher/config.go @@ -3,6 +3,7 @@ package batcher import ( "errors" "fmt" + "strings" "time" "github.com/urfave/cli/v2" @@ -74,6 +75,13 @@ func (c *CLIConfig) Check() error { if c.RollupRpc == "" { return errors.New("empty rollup RPC URL") } + if strings.Contains(c.RollupRpc, ",") || strings.Contains(c.L2EthRpc, ",") { + rollupUrls := strings.Split(c.RollupRpc, ",") + ethUrls := strings.Split(c.L2EthRpc, ",") + if len(rollupUrls) != len(ethUrls) { + return errors.New("number of rollup and eth URLs must match") + } + } if c.PollInterval == 0 { return errors.New("must set PollInterval") } diff --git a/op-batcher/batcher/service.go b/op-batcher/batcher/service.go index e4d9d08437de..897315ecc473 100644 --- a/op-batcher/batcher/service.go +++ b/op-batcher/batcher/service.go @@ -127,10 +127,10 @@ func (bs *BatcherService) initRPCClients(ctx context.Context, cfg *CLIConfig) er bs.L1Client = l1Client var endpointProvider dial.L2EndpointProvider - if strings.Contains(cfg.RollupRpc, ",") || strings.Contains(cfg.L2EthRpc, ",") { + if strings.Contains(cfg.RollupRpc, ",") && strings.Contains(cfg.L2EthRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") ethUrls := strings.Split(cfg.L2EthRpc, ",") - endpointProvider, err = dial.NewActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log, dial.DialEthClientInterfaceWithTimeout, dial.DialRollupClientInterfaceWithTimeout) + endpointProvider, err = dial.NewActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, bs.Log) } else { endpointProvider, err = dial.NewStaticL2EndpointProvider(ctx, bs.Log, cfg.L2EthRpc, cfg.RollupRpc) } diff --git a/op-proposer/proposer/service.go b/op-proposer/proposer/service.go index 70328d776d7b..20cd186af734 100644 --- a/op-proposer/proposer/service.go +++ b/op-proposer/proposer/service.go @@ -125,7 +125,7 @@ func (ps *ProposerService) initRPCClients(ctx context.Context, cfg *CLIConfig) e var rollupProvider dial.RollupProvider if strings.Contains(cfg.RollupRpc, ",") { rollupUrls := strings.Split(cfg.RollupRpc, ",") - rollupProvider, err = dial.NewActiveL2RollupProvider(ctx, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log, dial.DialRollupClientInterfaceWithTimeout) + rollupProvider, err = dial.NewActiveL2RollupProvider(ctx, rollupUrls, dial.DefaultActiveSequencerFollowerCheckDuration, dial.DefaultDialTimeout, ps.Log) } else { rollupProvider, err = dial.NewStaticL2RollupProvider(ctx, ps.Log, cfg.RollupRpc) } diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index b943830b36f1..34988ee6caa4 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -17,9 +17,20 @@ type ActiveL2EndpointProvider struct { ActiveL2RollupProvider currentEthClient EthClientInterface ethDialer ethDialer + ethUrls []string } -func NewActiveL2EndpointProvider( +func NewActiveL2EndpointProvider(ctx context.Context, ethUrls, rollupUrls []string, checkDuration time.Duration, networkTimeout time.Duration, logger log.Logger) (*ActiveL2EndpointProvider, error) { + dialEthClientInterfaceWithTimeout := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { + return DialEthClientWithTimeout(ctx, timeout, log, url) + } + dialRollupClientInterfaceWithTimeout := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + return DialRollupClientWithTimeout(ctx, timeout, log, url) + } + return newActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, checkDuration, networkTimeout, logger, dialEthClientInterfaceWithTimeout, dialRollupClientInterfaceWithTimeout) +} + +func newActiveL2EndpointProvider( ctx context.Context, ethUrls, rollupUrls []string, checkDuration time.Duration, @@ -35,7 +46,7 @@ func NewActiveL2EndpointProvider( return nil, errors.New("number of eth urls and rollup urls mismatch") } - rollupProvider, err := NewActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) + rollupProvider, err := newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) if err != nil { return nil, err } @@ -49,77 +60,30 @@ func NewActiveL2EndpointProvider( ActiveL2RollupProvider: *rollupProvider, currentEthClient: ethClient, ethDialer: ethDialer, + ethUrls: ethUrls, }, nil } func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { p.clientLock.Lock() defer p.clientLock.Unlock() + indexBeforeCheck := p.currentIndex err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err } - - return p.currentEthClient, nil -} - -func (p *ActiveL2EndpointProvider) ensureActiveEndpoint(ctx context.Context) error { - if !p.shouldCheck() { - return nil - } - - if err := p.findActiveEndpoints(ctx); err != nil { - return err - } - p.activeTimeout = time.Now().Add(p.checkDuration) - return nil -} - -func (p *ActiveL2EndpointProvider) findActiveEndpoints(ctx context.Context) error { - const maxRetries = 20 - totalAttempts := 0 - - for totalAttempts < maxRetries { - active, err := p.checkCurrentSequencer(ctx) + if indexBeforeCheck != p.currentIndex { + // we changed sequencers, dial a new EthClient + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + ep := p.ethUrls[p.currentIndex] + ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { - p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "try", totalAttempts) - p.currentRollupClient.Close() - p.currentEthClient.Close() - } else if active { - p.log.Debug("Current sequencer active.", "try", totalAttempts) - return nil - } else { - p.log.Info("Current sequencer inactive, closing connection and trying next.", "try", totalAttempts) - p.currentRollupClient.Close() - p.currentEthClient.Close() - } - if err := p.dialNextSequencer(ctx); err != nil { - return fmt.Errorf("dialing next sequencer: %w", err) + return nil, fmt.Errorf("dialing eth client: %w", err) } - - totalAttempts++ - } - return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) -} - -func (p *ActiveL2EndpointProvider) dialNextSequencer(ctx context.Context) error { - cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) - defer cancel() - p.currentIndex = (p.currentIndex + 1) % p.numEndpoints() - ep := p.rollupUrls[p.currentIndex] - p.log.Debug("Dialing next sequencer.", "url", ep) - rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) - if err != nil { - return fmt.Errorf("dialing rollup client: %w", err) - } - ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) - if err != nil { - return fmt.Errorf("dialing eth client: %w", err) + p.currentEthClient = ethClient } - - p.currentRollupClient = rollupClient - p.currentEthClient = ethClient - return nil + return p.currentEthClient, nil } func (p *ActiveL2EndpointProvider) Close() { diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index e8e5839986fa..1020d0194b16 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -16,26 +16,26 @@ import ( // will failover to the next provider if the current one is not active. func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { // Create two mock rollup clients, one of which will declare itself inactive after first check. - primarySequencer := testutils.MockRollupClient{} + primarySequencer := new(testutils.MockRollupClient) primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(false, nil) primarySequencer.ExpectClose() - secondarySequencer := testutils.MockRollupClient{} + secondarySequencer := new(testutils.MockRollupClient) secondarySequencer.ExpectSequencerActive(true, nil) mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - if url == "primary" { - return &primarySequencer, nil - } else if url == "secondary" { - return &secondarySequencer, nil + if url == "primaryRollup" { + return primarySequencer, nil + } else if url == "secondaryRollup" { + return secondarySequencer, nil } else { return nil, fmt.Errorf("unknown test url: %s", url) } } - endpointProvider, err := NewActiveL2RollupProvider( + endpointProvider, err := newActiveL2RollupProvider( context.Background(), - []string{"primary", "secondary"}, + []string{"primaryRollup", "secondaryRollup"}, 1*time.Microsecond, 1*time.Minute, testlog.Logger(t, log.LvlDebug), @@ -45,10 +45,10 @@ func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { // Check that the first client is used, then the second once the first declares itself inactive. firstSequencerUsed, err := endpointProvider.RollupClient(context.Background()) require.NoError(t, err) - require.True(t, &primarySequencer == firstSequencerUsed) // avoids copying the struct (and its mutex, etc.) + require.Same(t, primarySequencer, firstSequencerUsed) secondSequencerUsed, err := endpointProvider.RollupClient(context.Background()) require.NoError(t, err) - require.True(t, &secondarySequencer == secondSequencerUsed) + require.Same(t, secondarySequencer, secondSequencerUsed) } // TestActiveSequencerFailoverBehavior_L2Providers tests that the ActiveL2EndpointProvider @@ -56,38 +56,38 @@ func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { // as TestActiveSequencerFailoverBehavior_RollupProviders, // but ensure the added `EthClient()` method also triggers the failover. - primarySequencer := testutils.MockRollupClient{} + primarySequencer := new(testutils.MockRollupClient) primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(false, nil) primarySequencer.ExpectClose() - secondarySequencer := testutils.MockRollupClient{} + secondarySequencer := new(testutils.MockRollupClient) secondarySequencer.ExpectSequencerActive(true, nil) mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - if url == "primary" { - return &primarySequencer, nil - } else if url == "secondary" { - return &secondarySequencer, nil + if url == "primaryRollup" { + return primarySequencer, nil + } else if url == "secondaryRollup" { + return secondarySequencer, nil } else { return nil, fmt.Errorf("unknown test url: %s", url) } } - primaryEthClient := testutils.MockEthClient{} + primaryEthClient := new(testutils.MockEthClient) primaryEthClient.ExpectClose() - secondaryEthClient := testutils.MockEthClient{} + secondaryEthClient := new(testutils.MockEthClient) mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { - if url == "primary" { - return &primaryEthClient, nil - } else if url == "secondary" { - return &secondaryEthClient, nil + if url == "primaryEth" { + return primaryEthClient, nil + } else if url == "secondaryEth" { + return secondaryEthClient, nil } else { return nil, fmt.Errorf("unknown test url: %s", url) } } - endpointProvider, err := NewActiveL2EndpointProvider( + endpointProvider, err := newActiveL2EndpointProvider( context.Background(), - []string{"primary", "secondary"}, - []string{"primary", "secondary"}, + []string{"primaryEth", "secondaryEth"}, + []string{"primaryRollup", "secondaryRollup"}, 1*time.Microsecond, 1*time.Minute, testlog.Logger(t, log.LvlDebug), @@ -97,8 +97,8 @@ func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { require.NoError(t, err) firstClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) - require.True(t, &primaryEthClient == firstClientUsed) // avoids copying the struct (and its mutex, etc.) + require.Same(t, primaryEthClient, firstClientUsed) // avoids copying the struct (and its mutex, etc.) secondClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) - require.True(t, &secondaryEthClient == secondClientUsed) + require.Same(t, secondaryEthClient, secondClientUsed) } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 9791adc1de2a..a7d57a132478 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -32,6 +32,19 @@ func NewActiveL2RollupProvider( checkDuration time.Duration, networkTimeout time.Duration, logger log.Logger, +) (*ActiveL2RollupProvider, error) { + dialRollupClientInterfaceWithTimeout := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + return DialRollupClientWithTimeout(ctx, timeout, log, url) + } + return newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, dialRollupClientInterfaceWithTimeout) +} + +func newActiveL2RollupProvider( + ctx context.Context, + rollupUrls []string, + checkDuration time.Duration, + networkTimeout time.Duration, + logger log.Logger, dialer rollupDialer, ) (*ActiveL2RollupProvider, error) { if len(rollupUrls) == 0 { @@ -84,28 +97,23 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { } func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - const maxRetries = 20 - totalAttempts := 0 - - for totalAttempts < maxRetries { + for attempt := range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) if err != nil { - p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "try", totalAttempts) + p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "try", attempt) p.currentRollupClient.Close() } else if active { - p.log.Debug("Current sequencer active.", "try", totalAttempts) + p.log.Debug("Current sequencer active.", "try", attempt) return nil } else { - p.log.Info("Current sequencer inactive, closing connection and trying next.", "try", totalAttempts) + p.log.Info("Current sequencer inactive, closing connection and trying next.", "try", attempt) p.currentRollupClient.Close() } if err := p.dialNextSequencer(ctx); err != nil { return fmt.Errorf("dialing next sequencer: %w", err) } - - totalAttempts++ } - return fmt.Errorf("failed to find an active sequencer after %d retries", maxRetries) + return fmt.Errorf("failed to find an active sequencer, tried following urls: %v", p.rollupUrls) } func (p *ActiveL2RollupProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { @@ -121,14 +129,14 @@ func (p *ActiveL2RollupProvider) numEndpoints() int { func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() + p.currentIndex = (p.currentIndex + 1) % p.numEndpoints() ep := p.rollupUrls[p.currentIndex] - p.log.Debug("Dialing next sequencer in embedded func.", "url", ep) + rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) } - p.currentRollupClient = rollupClient return nil } diff --git a/op-service/dial/dial.go b/op-service/dial/dial.go index 616066f0d56f..11410197b7d3 100644 --- a/op-service/dial/dial.go +++ b/op-service/dial/dial.go @@ -33,10 +33,6 @@ func DialEthClientWithTimeout(ctx context.Context, timeout time.Duration, log lo return ethclient.NewClient(c), nil } -func DialEthClientInterfaceWithTimeout(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { - return DialEthClientWithTimeout(ctx, timeout, log, url) -} - // DialRollupClientWithTimeout attempts to dial the RPC provider using the provided URL. // If the dial doesn't complete within timeout seconds, this method will return an error. func DialRollupClientWithTimeout(ctx context.Context, timeout time.Duration, log log.Logger, url string) (*sources.RollupClient, error) { @@ -51,10 +47,6 @@ func DialRollupClientWithTimeout(ctx context.Context, timeout time.Duration, log return sources.NewRollupClient(client.NewBaseRPCClient(rpcCl)), nil } -func DialRollupClientInterfaceWithTimeout(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - return DialRollupClientWithTimeout(ctx, timeout, log, url) -} - // Dials a JSON-RPC endpoint repeatedly, with a backoff, until a client connection is established. Auth is optional. func dialRPCClientWithBackoff(ctx context.Context, log log.Logger, addr string) (*rpc.Client, error) { bOff := retry.Fixed(defaultRetryTime) diff --git a/op-service/testutils/mock_eth_client.go b/op-service/testutils/mock_eth_client.go index fa8bd57d0537..8c2ef6f3015c 100644 --- a/op-service/testutils/mock_eth_client.go +++ b/op-service/testutils/mock_eth_client.go @@ -131,11 +131,12 @@ func (m *MockEthClient) ExpectReadStorageAt(ctx context.Context, address common. } func (m *MockEthClient) BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error) { - return m.Mock.MethodCalled(("BlockByNumber"), number).Get(0).(*types.Block), nil + out := m.Mock.Called(number) + return out.Get(0).(*types.Block), out.Error(1) } func (m *MockEthClient) ExpectBlockByNumber(number *big.Int, block *types.Block, err error) { - m.Mock.On("BlockByNumber", number).Once().Return(block, &err) + m.Mock.On("BlockByNumber", number).Once().Return(block, err) } func (m *MockEthClient) ExpectClose() { @@ -143,5 +144,5 @@ func (m *MockEthClient) ExpectClose() { } func (m *MockEthClient) Close() { - m.Mock.MethodCalled("Close") + m.Mock.Called() } diff --git a/op-service/testutils/mock_rollup_client.go b/op-service/testutils/mock_rollup_client.go index b914fd6f1603..a6e79c996ad3 100644 --- a/op-service/testutils/mock_rollup_client.go +++ b/op-service/testutils/mock_rollup_client.go @@ -14,48 +14,48 @@ type MockRollupClient struct { } func (m *MockRollupClient) OutputAtBlock(ctx context.Context, blockNum uint64) (*eth.OutputResponse, error) { - out := m.Mock.MethodCalled("OutputAtBlock", blockNum) - return out[0].(*eth.OutputResponse), *out[1].(*error) + out := m.Mock.Called(blockNum) + return out.Get(0).(*eth.OutputResponse), out.Error(1) } func (m *MockRollupClient) ExpectOutputAtBlock(blockNum uint64, response *eth.OutputResponse, err error) { - m.Mock.On("OutputAtBlock", blockNum).Once().Return(response, &err) + m.Mock.On("OutputAtBlock", blockNum).Once().Return(response, err) } func (m *MockRollupClient) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) { - out := m.Mock.MethodCalled("SyncStatus") - return out[0].(*eth.SyncStatus), *out[1].(*error) + out := m.Mock.Called() + return out.Get(0).(*eth.SyncStatus), out.Error(1) } func (m *MockRollupClient) ExpectSyncStatus(status *eth.SyncStatus, err error) { - m.Mock.On("SyncStatus").Once().Return(status, &err) + m.Mock.On("SyncStatus").Once().Return(status, err) } func (m *MockRollupClient) RollupConfig(ctx context.Context) (*rollup.Config, error) { - out := m.Mock.MethodCalled("RollupConfig") - return out[0].(*rollup.Config), *out[1].(*error) + out := m.Mock.Called() + return out.Get(0).(*rollup.Config), out.Error(1) } func (m *MockRollupClient) ExpectRollupConfig(config *rollup.Config, err error) { - m.Mock.On("RollupConfig").Once().Return(config, &err) + m.Mock.On("RollupConfig").Once().Return(config, err) } func (m *MockRollupClient) StartSequencer(ctx context.Context, unsafeHead common.Hash) error { - out := m.Mock.MethodCalled("StartSequencer", unsafeHead) - return *out[0].(*error) + out := m.Mock.Called(unsafeHead) + return out.Error(0) } func (m *MockRollupClient) ExpectStartSequencer(unsafeHead common.Hash, err error) { - m.Mock.On("StartSequencer", unsafeHead).Once().Return(&err) + m.Mock.On("StartSequencer", unsafeHead).Once().Return(err) } func (m *MockRollupClient) SequencerActive(ctx context.Context) (bool, error) { - out := m.Mock.MethodCalled("SequencerActive") - return out[0].(bool), *out[1].(*error) + out := m.Mock.Called() + return out.Bool(0), out.Error(1) } func (m *MockRollupClient) ExpectSequencerActive(active bool, err error) { - m.Mock.On("SequencerActive").Once().Return(active, &err) + m.Mock.On("SequencerActive").Once().Return(active, err) } func (m *MockRollupClient) ExpectClose() { @@ -63,5 +63,5 @@ func (m *MockRollupClient) ExpectClose() { } func (m *MockRollupClient) Close() { - m.Mock.MethodCalled("Close") + m.Mock.Called() } From 999b2710cfb4f3f89b524bcd8b676a4f8d31278e Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Thu, 14 Dec 2023 15:39:59 -0500 Subject: [PATCH 16/41] op-service: Active L2 Provider - add test case for a sequencer returning an error. --- op-service/dial/active_l2_provider_test.go | 43 +++++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 1020d0194b16..f4984edfafe4 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -14,7 +14,7 @@ import ( // TestActiveSequencerFailoverBehavior_RollupProvider tests that the ActiveL2RollupProvider // will failover to the next provider if the current one is not active. -func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { +func TestActiveSequencerFailoverBehavior_RollupProviders_Inactive(t *testing.T) { // Create two mock rollup clients, one of which will declare itself inactive after first check. primarySequencer := new(testutils.MockRollupClient) primarySequencer.ExpectSequencerActive(true, nil) @@ -53,7 +53,7 @@ func TestActiveSequencerFailoverBehavior_RollupProviders(t *testing.T) { // TestActiveSequencerFailoverBehavior_L2Providers tests that the ActiveL2EndpointProvider // will failover to the next provider if the current one is not active. -func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { +func TestActiveSequencerFailoverBehavior_L2Providers_Inactive(t *testing.T) { // as TestActiveSequencerFailoverBehavior_RollupProviders, // but ensure the added `EthClient()` method also triggers the failover. primarySequencer := new(testutils.MockRollupClient) @@ -102,3 +102,42 @@ func TestActiveSequencerFailoverBehavior_L2Providers(t *testing.T) { require.NoError(t, err) require.Same(t, secondaryEthClient, secondClientUsed) } + +// TestActiveSequencerFailoverBehavior_RollupProvider tests that the ActiveL2RollupProvider +// will failover to the next provider if the current one is not active. +func TestActiveSequencerFailoverBehavior_RollupProviders_Errored(t *testing.T) { + // Create two mock rollup clients, one of which will error out after first check + primarySequencer := new(testutils.MockRollupClient) + primarySequencer.ExpectSequencerActive(true, nil) + primarySequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) + primarySequencer.ExpectClose() + secondarySequencer := new(testutils.MockRollupClient) + secondarySequencer.ExpectSequencerActive(true, nil) + + mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + if url == "primaryRollup" { + return primarySequencer, nil + } else if url == "secondaryRollup" { + return secondarySequencer, nil + } else { + return nil, fmt.Errorf("unknown test url: %s", url) + } + } + + endpointProvider, err := newActiveL2RollupProvider( + context.Background(), + []string{"primaryRollup", "secondaryRollup"}, + 1*time.Microsecond, + 1*time.Minute, + testlog.Logger(t, log.LvlDebug), + mockRollupDialer, + ) + require.NoError(t, err) + // Check that the first client is used, then the second once the first declares itself inactive. + firstSequencerUsed, err := endpointProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, primarySequencer, firstSequencerUsed) + secondSequencerUsed, err := endpointProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, secondarySequencer, secondSequencerUsed) +} From 3ca276d0f510f952053f5a861b7a81a25c7196da Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Thu, 14 Dec 2023 16:36:43 -0500 Subject: [PATCH 17/41] op-service: Active L2/Rollup Providers: improve unit testing and logging. --- op-service/dial/active_l2_provider.go | 2 + op-service/dial/active_l2_provider_test.go | 62 ++++++++++++++++++++-- op-service/dial/active_rollup_provider.go | 11 ++-- 3 files changed, 65 insertions(+), 10 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 34988ee6caa4..1bdf92b140cf 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -77,10 +77,12 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() ep := p.ethUrls[p.currentIndex] + log.Info("sequencer changed, dialing new eth client", "new_index", p.currentIndex, "new_url", ep) ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } + p.currentEthClient.Close() p.currentEthClient = ethClient } return p.currentEthClient, nil diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index f4984edfafe4..2e0110e0acdc 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/require" ) -// TestActiveSequencerFailoverBehavior_RollupProvider tests that the ActiveL2RollupProvider +// TestActiveSequencerFailoverBehavior_RollupProviders_Inactive tests that the ActiveL2RollupProvider // will failover to the next provider if the current one is not active. func TestActiveSequencerFailoverBehavior_RollupProviders_Inactive(t *testing.T) { // Create two mock rollup clients, one of which will declare itself inactive after first check. @@ -51,7 +51,7 @@ func TestActiveSequencerFailoverBehavior_RollupProviders_Inactive(t *testing.T) require.Same(t, secondarySequencer, secondSequencerUsed) } -// TestActiveSequencerFailoverBehavior_L2Providers tests that the ActiveL2EndpointProvider +// TestActiveSequencerFailoverBehavior_L2Providers_Inactive tests that the ActiveL2EndpointProvider // will failover to the next provider if the current one is not active. func TestActiveSequencerFailoverBehavior_L2Providers_Inactive(t *testing.T) { // as TestActiveSequencerFailoverBehavior_RollupProviders, @@ -97,14 +97,14 @@ func TestActiveSequencerFailoverBehavior_L2Providers_Inactive(t *testing.T) { require.NoError(t, err) firstClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) - require.Same(t, primaryEthClient, firstClientUsed) // avoids copying the struct (and its mutex, etc.) + require.Same(t, primaryEthClient, firstClientUsed) secondClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, secondaryEthClient, secondClientUsed) } -// TestActiveSequencerFailoverBehavior_RollupProvider tests that the ActiveL2RollupProvider -// will failover to the next provider if the current one is not active. +// TestActiveSequencerFailoverBehavior_RollupProviders_Errored is as the _Inactive test, +// but with the first provider returning an error instead of declaring itself inactive. func TestActiveSequencerFailoverBehavior_RollupProviders_Errored(t *testing.T) { // Create two mock rollup clients, one of which will error out after first check primarySequencer := new(testutils.MockRollupClient) @@ -141,3 +141,55 @@ func TestActiveSequencerFailoverBehavior_RollupProviders_Errored(t *testing.T) { require.NoError(t, err) require.Same(t, secondarySequencer, secondSequencerUsed) } + +// TestActiveSequencerFailoverBehavior_L2Providers_Errored is as the _Inactive test, +// but with the first provider returning an error instead of declaring itself inactive. +func TestActiveSequencerFailoverBehavior_L2Providers_Errored(t *testing.T) { + // as TestActiveSequencerFailoverBehavior_RollupProviders, + // but ensure the added `EthClient()` method also triggers the failover. + primarySequencer := new(testutils.MockRollupClient) + primarySequencer.ExpectSequencerActive(true, nil) + primarySequencer.ExpectSequencerActive(false, fmt.Errorf("a test error")) + primarySequencer.ExpectClose() + secondarySequencer := new(testutils.MockRollupClient) + secondarySequencer.ExpectSequencerActive(true, nil) + + mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + if url == "primaryRollup" { + return primarySequencer, nil + } else if url == "secondaryRollup" { + return secondarySequencer, nil + } else { + return nil, fmt.Errorf("unknown test url: %s", url) + } + } + primaryEthClient := new(testutils.MockEthClient) + primaryEthClient.ExpectClose() + secondaryEthClient := new(testutils.MockEthClient) + mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { + if url == "primaryEth" { + return primaryEthClient, nil + } else if url == "secondaryEth" { + return secondaryEthClient, nil + } else { + return nil, fmt.Errorf("unknown test url: %s", url) + } + } + endpointProvider, err := newActiveL2EndpointProvider( + context.Background(), + []string{"primaryEth", "secondaryEth"}, + []string{"primaryRollup", "secondaryRollup"}, + 1*time.Microsecond, + 1*time.Minute, + testlog.Logger(t, log.LvlDebug), + mockEthDialer, + mockRollupDialer, + ) + require.NoError(t, err) + firstClientUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, primaryEthClient, firstClientUsed) + secondClientUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, secondaryEthClient, secondClientUsed) +} diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index a7d57a132478..f3bfadd770b8 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -97,16 +97,17 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { } func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - for attempt := range p.rollupUrls { + for index := range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) + ep := p.rollupUrls[p.currentIndex] if err != nil { - p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "try", attempt) + p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "index", index, "url", ep) p.currentRollupClient.Close() } else if active { - p.log.Debug("Current sequencer active.", "try", attempt) + p.log.Debug("Current sequencer active.", "index", index, "url", ep) return nil } else { - p.log.Info("Current sequencer inactive, closing connection and trying next.", "try", attempt) + p.log.Info("Current sequencer inactive, closing connection and trying next.", "index", index, "url", ep) p.currentRollupClient.Close() } if err := p.dialNextSequencer(ctx); err != nil { @@ -132,7 +133,7 @@ func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { p.currentIndex = (p.currentIndex + 1) % p.numEndpoints() ep := p.rollupUrls[p.currentIndex] - + p.log.Info("Dialing next sequencer.", "index", p.currentIndex, "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) From 1efffd3bed0683f62f2de3065f2405005c75ddef Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Fri, 15 Dec 2023 16:36:23 -0500 Subject: [PATCH 18/41] op-service, op-batcher: address review comments in 8585 regarding first-startup behavior and testing. --- op-batcher/batcher/config.go | 8 +- op-service/dial/active_l2_provider.go | 44 +- op-service/dial/active_l2_provider_test.go | 543 ++++++++++++++++----- op-service/dial/active_rollup_provider.go | 60 ++- 4 files changed, 470 insertions(+), 185 deletions(-) diff --git a/op-batcher/batcher/config.go b/op-batcher/batcher/config.go index 2690a7ad9c78..65805af9bb97 100644 --- a/op-batcher/batcher/config.go +++ b/op-batcher/batcher/config.go @@ -75,12 +75,8 @@ func (c *CLIConfig) Check() error { if c.RollupRpc == "" { return errors.New("empty rollup RPC URL") } - if strings.Contains(c.RollupRpc, ",") || strings.Contains(c.L2EthRpc, ",") { - rollupUrls := strings.Split(c.RollupRpc, ",") - ethUrls := strings.Split(c.L2EthRpc, ",") - if len(rollupUrls) != len(ethUrls) { - return errors.New("number of rollup and eth URLs must match") - } + if strings.Count(c.RollupRpc, ",") != strings.Count(c.L2EthRpc, ",") { + return errors.New("number of rollup and eth URLs must match") } if c.PollInterval == 0 { return errors.New("must set PollInterval") diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 1bdf92b140cf..b50ca388d16b 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -16,18 +16,26 @@ type ethDialer func(ctx context.Context, timeout time.Duration, log log.Logger, type ActiveL2EndpointProvider struct { ActiveL2RollupProvider currentEthClient EthClientInterface + ethClientIndex int ethDialer ethDialer ethUrls []string } -func NewActiveL2EndpointProvider(ctx context.Context, ethUrls, rollupUrls []string, checkDuration time.Duration, networkTimeout time.Duration, logger log.Logger) (*ActiveL2EndpointProvider, error) { - dialEthClientInterfaceWithTimeout := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { +func NewActiveL2EndpointProvider(ctx context.Context, + ethUrls, rollupUrls []string, + checkDuration time.Duration, + networkTimeout time.Duration, + logger log.Logger, +) (*ActiveL2EndpointProvider, error) { + ethDialer := func(ctx context.Context, timeout time.Duration, + log log.Logger, url string) (EthClientInterface, error) { return DialEthClientWithTimeout(ctx, timeout, log, url) } - dialRollupClientInterfaceWithTimeout := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + rollupDialer := func(ctx context.Context, timeout time.Duration, + log log.Logger, url string) (RollupClientInterface, error) { return DialRollupClientWithTimeout(ctx, timeout, log, url) } - return newActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, checkDuration, networkTimeout, logger, dialEthClientInterfaceWithTimeout, dialRollupClientInterfaceWithTimeout) + return newActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, checkDuration, networkTimeout, logger, ethDialer, rollupDialer) } func newActiveL2EndpointProvider( @@ -50,39 +58,41 @@ func newActiveL2EndpointProvider( if err != nil { return nil, err } + ep := &ActiveL2EndpointProvider{ + ActiveL2RollupProvider: *rollupProvider, + ethDialer: ethDialer, + ethUrls: ethUrls, + } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - ethClient, err := ethDialer(cctx, networkTimeout, logger, ethUrls[0]) + _, err = ep.EthClient(cctx) if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - return &ActiveL2EndpointProvider{ - ActiveL2RollupProvider: *rollupProvider, - currentEthClient: ethClient, - ethDialer: ethDialer, - ethUrls: ethUrls, - }, nil + return ep, nil } func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { p.clientLock.Lock() defer p.clientLock.Unlock() - indexBeforeCheck := p.currentIndex err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err } - if indexBeforeCheck != p.currentIndex { - // we changed sequencers, dial a new EthClient + if p.ethClientIndex != p.rollupIndex || p.currentEthClient == nil { + // we changed sequencers (or the provider was just created), dial a new EthClient cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - ep := p.ethUrls[p.currentIndex] - log.Info("sequencer changed, dialing new eth client", "new_index", p.currentIndex, "new_url", ep) + p.ethClientIndex = p.rollupIndex + ep := p.ethUrls[p.ethClientIndex] + log.Info("sequencer changed, dialing new eth client", "new_index", p.rollupIndex, "new_url", ep) ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - p.currentEthClient.Close() + if p.currentEthClient != nil { + p.currentEthClient.Close() + } p.currentEthClient = ethClient } return p.currentEthClient, nil diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 2e0110e0acdc..13dfef92c74d 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -12,184 +12,457 @@ import ( "github.com/stretchr/testify/require" ) -// TestActiveSequencerFailoverBehavior_RollupProviders_Inactive tests that the ActiveL2RollupProvider -// will failover to the next provider if the current one is not active. -func TestActiveSequencerFailoverBehavior_RollupProviders_Inactive(t *testing.T) { - // Create two mock rollup clients, one of which will declare itself inactive after first check. - primarySequencer := new(testutils.MockRollupClient) - primarySequencer.ExpectSequencerActive(true, nil) - primarySequencer.ExpectSequencerActive(false, nil) - primarySequencer.ExpectClose() - secondarySequencer := new(testutils.MockRollupClient) - secondarySequencer.ExpectSequencerActive(true, nil) +// endpointProviderTest is a test harness for setting up endpoint provider tests. +type endpointProviderTest struct { + t *testing.T + rollupClients []*testutils.MockRollupClient + ethClients []*testutils.MockEthClient +} - mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - if url == "primaryRollup" { - return primarySequencer, nil - } else if url == "secondaryRollup" { - return secondarySequencer, nil - } else { - return nil, fmt.Errorf("unknown test url: %s", url) +// setupEndpointProviderTest sets up the basic structure of the endpoint provider tests. +func setupEndpointProviderTest(t *testing.T, numSequencers int) *endpointProviderTest { + ept := &endpointProviderTest{ + t: t, + rollupClients: make([]*testutils.MockRollupClient, numSequencers), + ethClients: make([]*testutils.MockEthClient, numSequencers), + } + + for i := 0; i < numSequencers; i++ { + ept.rollupClients[i] = new(testutils.MockRollupClient) + ept.ethClients[i] = new(testutils.MockEthClient) + } + + return ept +} + +// newActiveL2EndpointProvider constructs a new ActiveL2RollupProvider using the test harness setup. +func (et *endpointProviderTest) newActiveL2RollupProvider(checkDuration time.Duration) (*ActiveL2RollupProvider, error) { + rollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + for i, client := range et.rollupClients { + if url == fmt.Sprintf("rollup%d", i) { + return client, nil + } } + return nil, fmt.Errorf("unknown test url: %s", url) } - endpointProvider, err := newActiveL2RollupProvider( + // make the "URLs" + rollupUrls := make([]string, len(et.rollupClients)) + for i := range et.rollupClients { + rollupUrl := fmt.Sprintf("rollup%d", i) + rollupUrls[i] = rollupUrl + } + + return newActiveL2RollupProvider( context.Background(), - []string{"primaryRollup", "secondaryRollup"}, - 1*time.Microsecond, + rollupUrls, + checkDuration, 1*time.Minute, - testlog.Logger(t, log.LvlDebug), - mockRollupDialer, + testlog.Logger(et.t, log.LvlDebug), + rollupDialer, ) - require.NoError(t, err) - // Check that the first client is used, then the second once the first declares itself inactive. - firstSequencerUsed, err := endpointProvider.RollupClient(context.Background()) - require.NoError(t, err) - require.Same(t, primarySequencer, firstSequencerUsed) - secondSequencerUsed, err := endpointProvider.RollupClient(context.Background()) - require.NoError(t, err) - require.Same(t, secondarySequencer, secondSequencerUsed) } -// TestActiveSequencerFailoverBehavior_L2Providers_Inactive tests that the ActiveL2EndpointProvider -// will failover to the next provider if the current one is not active. -func TestActiveSequencerFailoverBehavior_L2Providers_Inactive(t *testing.T) { - // as TestActiveSequencerFailoverBehavior_RollupProviders, - // but ensure the added `EthClient()` method also triggers the failover. - primarySequencer := new(testutils.MockRollupClient) - primarySequencer.ExpectSequencerActive(true, nil) - primarySequencer.ExpectSequencerActive(false, nil) - primarySequencer.ExpectClose() - secondarySequencer := new(testutils.MockRollupClient) - secondarySequencer.ExpectSequencerActive(true, nil) - +// newActiveL2EndpointProvider constructs a new ActiveL2EndpointProvider using the test harness setup. +func (et *endpointProviderTest) newActiveL2EndpointProvider(checkDuration time.Duration) (*ActiveL2EndpointProvider, error) { mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - if url == "primaryRollup" { - return primarySequencer, nil - } else if url == "secondaryRollup" { - return secondarySequencer, nil - } else { - return nil, fmt.Errorf("unknown test url: %s", url) + for i, client := range et.rollupClients { + if url == fmt.Sprintf("rollup%d", i) { + return client, nil + } } + return nil, fmt.Errorf("unknown test url: %s", url) } - primaryEthClient := new(testutils.MockEthClient) - primaryEthClient.ExpectClose() - secondaryEthClient := new(testutils.MockEthClient) + mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { - if url == "primaryEth" { - return primaryEthClient, nil - } else if url == "secondaryEth" { - return secondaryEthClient, nil - } else { - return nil, fmt.Errorf("unknown test url: %s", url) + for i, client := range et.ethClients { + if url == fmt.Sprintf("eth%d", i) { + return client, nil + } } + return nil, fmt.Errorf("unknown test url: %s", url) } - endpointProvider, err := newActiveL2EndpointProvider( + + // make the "URLs" + rollupUrls := make([]string, len(et.rollupClients)) + for i := range et.rollupClients { + rollupUrl := fmt.Sprintf("rollup%d", i) + rollupUrls[i] = rollupUrl + } + ethUrls := make([]string, len(et.ethClients)) + for i := range et.ethClients { + ethUrl := fmt.Sprintf("eth%d", i) + ethUrls[i] = ethUrl + } + + return newActiveL2EndpointProvider( context.Background(), - []string{"primaryEth", "secondaryEth"}, - []string{"primaryRollup", "secondaryRollup"}, - 1*time.Microsecond, + ethUrls, + rollupUrls, + checkDuration, 1*time.Minute, - testlog.Logger(t, log.LvlDebug), + testlog.Logger(et.t, log.LvlDebug), mockEthDialer, mockRollupDialer, ) +} + +// TestRollupProvider_FailoverOnInactiveSequencer verifies that the ActiveL2RollupProvider +// will switch to the next provider if the current one becomes inactive. +func TestRollupProvider_FailoverOnInactiveSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + + primarySequencer.ExpectSequencerActive(true, nil) // respond true once on creation + primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `RollupClient()` the first time + + shortCheckDuration := 1 * time.Microsecond + rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) require.NoError(t, err) - firstClientUsed, err := endpointProvider.EthClient(context.Background()) + + firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) - require.Same(t, primaryEthClient, firstClientUsed) - secondClientUsed, err := endpointProvider.EthClient(context.Background()) + require.Same(t, primarySequencer, firstSequencerUsed) + + primarySequencer.ExpectSequencerActive(false, nil) // become inactive after that + primarySequencer.ExpectClose() + secondarySequencer.ExpectSequencerActive(true, nil) + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) - require.Same(t, secondaryEthClient, secondClientUsed) + require.Same(t, secondarySequencer, secondSequencerUsed) } -// TestActiveSequencerFailoverBehavior_RollupProviders_Errored is as the _Inactive test, -// but with the first provider returning an error instead of declaring itself inactive. -func TestActiveSequencerFailoverBehavior_RollupProviders_Errored(t *testing.T) { - // Create two mock rollup clients, one of which will error out after first check - primarySequencer := new(testutils.MockRollupClient) - primarySequencer.ExpectSequencerActive(true, nil) - primarySequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) +// TestEndpointProvider_FailoverOnInactiveSequencer verifies that the ActiveL2EndpointProvider +// will switch to the next provider if the current one becomes inactive. +func TestEndpointProvider_FailoverOnInactiveSequencer(t *testing.T) { + // as TestActiveSequencerFailoverBehavior_RollupProviders, + // but ensure the added `EthClient()` method also triggers the failover. + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit once on creation: embedded call of `RollupClient()` + primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit twice on creation: implicit call of `EthClient()` + primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `EthClient()` the first time + + shortCheckDuration := 1 * time.Microsecond + activeProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.NoError(t, err) + + firstSequencerUsed, err := activeProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], firstSequencerUsed) + + primarySequencer.ExpectSequencerActive(false, nil) // become inactive after that + secondarySequencer.ExpectSequencerActive(true, nil) + primarySequencer.ExpectClose() + ept.ethClients[0].ExpectClose() // we close the ethclient when we switch over to the next sequencer + secondSequencerUsed, err := activeProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[1], secondSequencerUsed) +} + +// TestRollupProvider_FailoverOnErroredSequencer verifies that the ActiveL2RollupProvider +// will switch to the next provider if the current one returns an error. +func TestRollupProvider_FailoverOnErroredSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + + primarySequencer.ExpectSequencerActive(true, nil) // respond true once on creation + primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `RollupClient()` the first time + + shortCheckDuration := 1 * time.Microsecond + rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + require.NoError(t, err) + + firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, primarySequencer, firstSequencerUsed) + + primarySequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) // error-out after that primarySequencer.ExpectClose() - secondarySequencer := new(testutils.MockRollupClient) secondarySequencer.ExpectSequencerActive(true, nil) + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, secondarySequencer, secondSequencerUsed) +} - mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - if url == "primaryRollup" { - return primarySequencer, nil - } else if url == "secondaryRollup" { - return secondarySequencer, nil - } else { - return nil, fmt.Errorf("unknown test url: %s", url) - } - } +// TestEndpointProvider_FailoverOnErroredSequencer verifies that the ActiveL2EndpointProvider +// will switch to the next provider if the current one returns an error. +func TestEndpointProvider_FailoverOnErroredSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + primaryEthClient, secondaryEthClient := ept.ethClients[0], ept.ethClients[1] - endpointProvider, err := newActiveL2RollupProvider( - context.Background(), - []string{"primaryRollup", "secondaryRollup"}, - 1*time.Microsecond, - 1*time.Minute, - testlog.Logger(t, log.LvlDebug), - mockRollupDialer, - ) + primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit once on creation: embedded call of `RollupClient()` + primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit twice on creation: implicit call of `EthClient()` + + shortCheckDuration := 1 * time.Microsecond + activeProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.NoError(t, err) + + primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `EthClient()` the first time + firstSequencerUsed, err := activeProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, primaryEthClient, firstSequencerUsed) + + primarySequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) // error out after that + primarySequencer.ExpectClose() + primaryEthClient.ExpectClose() + secondarySequencer.ExpectSequencerActive(true, nil) + + secondSequencerUsed, err := activeProvider.EthClient(context.Background()) require.NoError(t, err) - // Check that the first client is used, then the second once the first declares itself inactive. - firstSequencerUsed, err := endpointProvider.RollupClient(context.Background()) + require.Same(t, secondaryEthClient, secondSequencerUsed) +} + +// TestRollupProvider_NoExtraCheckOnActiveSequencer verifies that the ActiveL2RollupProvider +// does not change if the current sequencer is active. +func TestRollupProvider_NoExtraCheckOnActiveSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks Active on creation + + shortCheckDuration := 1 * time.Microsecond + rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + require.NoError(t, err) + require.Same(t, primarySequencer, rollupProvider.currentRollupClient) + + primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks again on RollupClient() + + firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, firstSequencerUsed) - secondSequencerUsed, err := endpointProvider.RollupClient(context.Background()) +} + +// TestEndpointProvider_NoExtraCheckOnActiveSequencer verifies that the ActiveL2EndpointProvider +// does not change if the current sequencer is active. +func TestEndpointProvider_NoExtraCheckOnActiveSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks Active twice on creation (once for internal RollupClient() call) + primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks Active twice on creation (once for internal EthClient() call) + + shortCheckDuration := 1 * time.Microsecond + endpointProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], endpointProvider.currentEthClient) + + primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks again on EthClient() + + firstEthClientUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], firstEthClientUsed) +} + +// TestRollupProvider_FailoverAndReturn verifies the ActiveL2RollupProvider's ability to +// failover and then return to the primary sequencer once it becomes active again. +func TestRollupProvider_FailoverAndReturn(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + + // Primary initially active + primarySequencer.ExpectSequencerActive(true, nil) + shortCheckDuration := 1 * time.Microsecond + rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + require.NoError(t, err) + + // Primary becomes inactive, secondary active + primarySequencer.ExpectSequencerActive(false, nil) + primarySequencer.ExpectClose() + secondarySequencer.ExpectSequencerActive(true, nil) + + // Fails over to secondary + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, secondarySequencer, secondSequencerUsed) + + // Primary becomes active again, secondary becomes inactive + primarySequencer.ExpectSequencerActive(true, nil) + secondarySequencer.ExpectSequencerActive(false, nil) + secondarySequencer.ExpectClose() + + // Should return to primary + thirdSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, primarySequencer, thirdSequencerUsed) } -// TestActiveSequencerFailoverBehavior_L2Providers_Errored is as the _Inactive test, -// but with the first provider returning an error instead of declaring itself inactive. -func TestActiveSequencerFailoverBehavior_L2Providers_Errored(t *testing.T) { - // as TestActiveSequencerFailoverBehavior_RollupProviders, - // but ensure the added `EthClient()` method also triggers the failover. - primarySequencer := new(testutils.MockRollupClient) +// TestEndpointProvider_FailoverAndReturn verifies the ActiveL2EndpointProvider's ability to +// failover and then return to the primary sequencer once it becomes active again. +func TestEndpointProvider_FailoverAndReturn(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + + // Primary initially active primarySequencer.ExpectSequencerActive(true, nil) - primarySequencer.ExpectSequencerActive(false, fmt.Errorf("a test error")) + primarySequencer.ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice + shortCheckDuration := 1 * time.Microsecond + endpointProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.NoError(t, err) + + // Primary becomes inactive, secondary active + primarySequencer.ExpectSequencerActive(false, nil) primarySequencer.ExpectClose() - secondarySequencer := new(testutils.MockRollupClient) + ept.ethClients[0].ExpectClose() secondarySequencer.ExpectSequencerActive(true, nil) - mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { - if url == "primaryRollup" { - return primarySequencer, nil - } else if url == "secondaryRollup" { - return secondarySequencer, nil - } else { - return nil, fmt.Errorf("unknown test url: %s", url) - } + // Fails over to secondary + secondEthClient, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[1], secondEthClient) + + // Primary becomes active again, secondary becomes inactive + primarySequencer.ExpectSequencerActive(true, nil) + secondarySequencer.ExpectSequencerActive(false, nil) + secondarySequencer.ExpectClose() + ept.ethClients[1].ExpectClose() + + // // Should return to primary + thirdSequencerUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], thirdSequencerUsed) +} + +// TestRollupProvider_InitialActiveSequencerSelection verifies that the ActiveL2RollupProvider +// selects the active sequencer correctly at the time of creation. +func TestRollupProvider_InitialActiveSequencerSelection(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + // Primary active at creation + primarySequencer.ExpectSequencerActive(true, nil) + + shortCheckDuration := 1 * time.Microsecond + rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + require.NoError(t, err) + + // Check immediately after creation without additional Active check + require.Same(t, primarySequencer, rollupProvider.currentRollupClient) +} + +// TestEndpointProvider_InitialActiveSequencerSelection verifies that the ActiveL2EndpointProvider +// selects the active sequencer correctly at the time of creation. +func TestEndpointProvider_InitialActiveSequencerSelection(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + // Primary active at creation + primarySequencer.ExpectSequencerActive(true, nil) + primarySequencer.ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice + + shortCheckDuration := 1 * time.Microsecond + rollupProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.NoError(t, err) + + // Check immediately after creation without additional Active check + require.Same(t, primarySequencer, rollupProvider.currentRollupClient) +} + +// TestRollupProvider_FailOnAllInactiveSequencers verifies that the ActiveL2RollupProvider +// fails to be created when all sequencers are inactive. +func TestRollupProvider_FailOnAllInactiveSequencers(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + + // All sequencers are inactive + for _, sequencer := range ept.rollupClients { + sequencer.ExpectSequencerActive(false, nil) + sequencer.ExpectClose() } - primaryEthClient := new(testutils.MockEthClient) - primaryEthClient.ExpectClose() - secondaryEthClient := new(testutils.MockEthClient) - mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { - if url == "primaryEth" { - return primaryEthClient, nil - } else if url == "secondaryEth" { - return secondaryEthClient, nil - } else { - return nil, fmt.Errorf("unknown test url: %s", url) - } + + shortCheckDuration := 1 * time.Microsecond + _, err := ept.newActiveL2RollupProvider(shortCheckDuration) + require.Error(t, err) // Expect an error as all sequencers are inactive +} + +// TestEndpointProvider_FailOnAllInactiveSequencers verifies that the ActiveL2EndpointProvider +// fails to be created when all sequencers are inactive. +func TestEndpointProvider_FailOnAllInactiveSequencers(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + ept.rollupClients[0].ExpectSequencerActive(false, nil) // see comment in other tests about why we expect this an extra time + + // All sequencers are inactive + for _, sequencer := range ept.rollupClients { + sequencer.ExpectSequencerActive(false, nil) + sequencer.ExpectClose() } - endpointProvider, err := newActiveL2EndpointProvider( - context.Background(), - []string{"primaryEth", "secondaryEth"}, - []string{"primaryRollup", "secondaryRollup"}, - 1*time.Microsecond, - 1*time.Minute, - testlog.Logger(t, log.LvlDebug), - mockEthDialer, - mockRollupDialer, - ) + + shortCheckDuration := 1 * time.Microsecond + _, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.Error(t, err) // Expect an error as all sequencers are inactive +} + +// TestRollupProvider_FailOnAllErroredSequencers verifies that the ActiveL2RollupProvider +// fails to create when all sequencers return an error. +func TestRollupProvider_FailOnAllErroredSequencers(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + + // All sequencers are inactive + for _, sequencer := range ept.rollupClients { + sequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) + sequencer.ExpectClose() + } + + shortCheckDuration := 1 * time.Microsecond + _, err := ept.newActiveL2RollupProvider(shortCheckDuration) + require.Error(t, err) // Expect an error as all sequencers are inactive +} + +// TestEndpointProvider_FailOnAllErroredSequencers verifies that the ActiveL2EndpointProvider +// fails to create when all sequencers return an error. +func TestEndpointProvider_FailOnAllErroredSequencers(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + ept.rollupClients[0].ExpectSequencerActive(true, fmt.Errorf("a test error")) // see comment in other tests about why we expect this an extra time + + // All sequencers are inactive + for _, sequencer := range ept.rollupClients { + sequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) + sequencer.ExpectClose() + } + + shortCheckDuration := 1 * time.Microsecond + _, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + require.Error(t, err) // Expect an error as all sequencers are inactive +} + +// TestRollupProvider_LongCheckDuration verifies the behavior of ActiveL2RollupProvider with a long check duration. +func TestRollupProvider_LongCheckDuration(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + longCheckDuration := 1 * time.Hour + primarySequencer.ExpectSequencerActive(true, nil) // Active check on creation + + rollupProvider, err := ept.newActiveL2RollupProvider(longCheckDuration) + require.NoError(t, err) + + // Should return the same client without extra checks + firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) - firstClientUsed, err := endpointProvider.EthClient(context.Background()) + require.Same(t, primarySequencer, firstSequencerUsed) + + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) - require.Same(t, primaryEthClient, firstClientUsed) - secondClientUsed, err := endpointProvider.EthClient(context.Background()) + require.Same(t, primarySequencer, secondSequencerUsed) +} + +// TestEndpointProvider_LongCheckDuration verifies the behavior of ActiveL2EndpointProvider with a long check duration. +func TestEndpointProvider_LongCheckDuration(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + longCheckDuration := 1 * time.Hour + primarySequencer.ExpectSequencerActive(true, nil) // Active check on creation + + endpointProvider, err := ept.newActiveL2EndpointProvider(longCheckDuration) + require.NoError(t, err) + + // Should return the same client without extra checks + firstEthClient, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], firstEthClient) + + secondEthClient, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) - require.Same(t, secondaryEthClient, secondClientUsed) + require.Same(t, ept.ethClients[0], secondEthClient) } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index f3bfadd770b8..7f0ecbab18e2 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -22,7 +22,7 @@ type ActiveL2RollupProvider struct { rollupUrls []string rollupDialer rollupDialer currentRollupClient RollupClientInterface - currentIndex int + rollupIndex int clientLock *sync.Mutex } @@ -33,10 +33,11 @@ func NewActiveL2RollupProvider( networkTimeout time.Duration, logger log.Logger, ) (*ActiveL2RollupProvider, error) { - dialRollupClientInterfaceWithTimeout := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + rollupDialer := func(ctx context.Context, timeout time.Duration, + log log.Logger, url string) (RollupClientInterface, error) { return DialRollupClientWithTimeout(ctx, timeout, log, url) } - return newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, dialRollupClientInterfaceWithTimeout) + return newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) } func newActiveL2RollupProvider( @@ -50,29 +51,36 @@ func newActiveL2RollupProvider( if len(rollupUrls) == 0 { return nil, errors.New("empty rollup urls list") } - + rp := &ActiveL2RollupProvider{ + checkDuration: checkDuration, + networkTimeout: networkTimeout, + log: logger, + rollupUrls: rollupUrls, + rollupDialer: dialer, + clientLock: &sync.Mutex{}, + } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - - rollupClient, err := dialer(cctx, networkTimeout, logger, rollupUrls[0]) + _, err := rp.RollupClient(cctx) if err != nil { return nil, fmt.Errorf("dialing rollup client: %w", err) } - - return &ActiveL2RollupProvider{ - checkDuration: checkDuration, - networkTimeout: networkTimeout, - log: logger, - rollupUrls: rollupUrls, - rollupDialer: dialer, - currentRollupClient: rollupClient, - clientLock: &sync.Mutex{}, - }, nil + return rp, nil } func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (RollupClientInterface, error) { p.clientLock.Lock() defer p.clientLock.Unlock() + if p.currentRollupClient == nil { + cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) + defer cancel() + ep := p.rollupUrls[p.rollupIndex] + rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) + if err != nil { + return nil, fmt.Errorf("dialing rollup client: %w", err) + } + p.currentRollupClient = rollupClient + } err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err @@ -84,7 +92,6 @@ func (p *ActiveL2RollupProvider) ensureActiveEndpoint(ctx context.Context) error if !p.shouldCheck() { return nil } - if err := p.findActiveEndpoints(ctx); err != nil { return err } @@ -97,18 +104,16 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { } func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - for index := range p.rollupUrls { + for range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) - ep := p.rollupUrls[p.currentIndex] + ep := p.rollupUrls[p.rollupIndex] if err != nil { - p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "index", index, "url", ep) - p.currentRollupClient.Close() + p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "offset", p.rollupIndex, "url", ep) } else if active { - p.log.Debug("Current sequencer active.", "index", index, "url", ep) + p.log.Debug("Current sequencer active.", "offset", p.rollupIndex, "url", ep) return nil } else { - p.log.Info("Current sequencer inactive, closing connection and trying next.", "index", index, "url", ep) - p.currentRollupClient.Close() + p.log.Info("Current sequencer inactive, closing connection and trying next.", "offset", p.rollupIndex, "url", ep) } if err := p.dialNextSequencer(ctx); err != nil { return fmt.Errorf("dialing next sequencer: %w", err) @@ -131,13 +136,14 @@ func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - p.currentIndex = (p.currentIndex + 1) % p.numEndpoints() - ep := p.rollupUrls[p.currentIndex] - p.log.Info("Dialing next sequencer.", "index", p.currentIndex, "url", ep) + p.rollupIndex = (p.rollupIndex + 1) % p.numEndpoints() + ep := p.rollupUrls[p.rollupIndex] + p.log.Info("Dialing next sequencer.", "index", p.rollupIndex, "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) } + p.currentRollupClient.Close() p.currentRollupClient = rollupClient return nil } From c497bd36ed424e9879a78f65d8a90c9062a89a45 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 11:05:44 -0500 Subject: [PATCH 19/41] op-service: address review comments through adding more tests, and moving "nil client" behavior from client getter to constructor. --- op-service/dial/active_l2_provider.go | 28 +++- op-service/dial/active_l2_provider_test.go | 163 +++++++++++++++++---- op-service/dial/active_rollup_provider.go | 34 +++-- 3 files changed, 173 insertions(+), 52 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index b50ca388d16b..c9c3c924d171 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -13,6 +13,10 @@ const DefaultActiveSequencerFollowerCheckDuration = 2 * DefaultDialTimeout type ethDialer func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) +// ActiveL2EndpointProvider is an interface for providing a RollupClient and l2 eth client +// It manages the lifecycle of the RollupClient and eth client for callers +// It does this by failing over down the list of rollupUrls if the current one is inactive or broken + type ActiveL2EndpointProvider struct { ActiveL2RollupProvider currentEthClient EthClientInterface @@ -21,6 +25,9 @@ type ActiveL2EndpointProvider struct { ethUrls []string } +// NewActiveL2EndpointProvider creates a new ActiveL2EndpointProvider +// the checkDuration is the duration between checks to see if the current rollup client is active +// provide a checkDuration of 0 to check every time func NewActiveL2EndpointProvider(ctx context.Context, ethUrls, rollupUrls []string, checkDuration time.Duration, @@ -58,18 +65,25 @@ func newActiveL2EndpointProvider( if err != nil { return nil, err } - ep := &ActiveL2EndpointProvider{ + p := &ActiveL2EndpointProvider{ ActiveL2RollupProvider: *rollupProvider, ethDialer: ethDialer, ethUrls: ethUrls, } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - _, err = ep.EthClient(cctx) + p.ethClientIndex = p.rollupIndex + ep := p.ethUrls[p.ethClientIndex] + ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - return ep, nil + p.currentEthClient = ethClient + _, err = p.EthClient(cctx) + if err != nil { + return nil, fmt.Errorf("dialing eth client: %w", err) + } + return p, nil } func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { @@ -79,8 +93,8 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte if err != nil { return nil, err } - if p.ethClientIndex != p.rollupIndex || p.currentEthClient == nil { - // we changed sequencers (or the provider was just created), dial a new EthClient + if p.ethClientIndex != p.rollupIndex { + // we changed sequencers, dial a new EthClient cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() p.ethClientIndex = p.rollupIndex @@ -90,9 +104,7 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - if p.currentEthClient != nil { - p.currentEthClient.Close() - } + p.currentEthClient.Close() p.currentEthClient = ethClient } return p.currentEthClient, nil diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 13dfef92c74d..bcf0edc7910f 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -116,8 +116,7 @@ func TestRollupProvider_FailoverOnInactiveSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) // respond true once on creation primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `RollupClient()` the first time - shortCheckDuration := 1 * time.Microsecond - rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + rollupProvider, err := ept.newActiveL2RollupProvider(0) require.NoError(t, err) firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) @@ -143,8 +142,7 @@ func TestEndpointProvider_FailoverOnInactiveSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit twice on creation: implicit call of `EthClient()` primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `EthClient()` the first time - shortCheckDuration := 1 * time.Microsecond - activeProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + activeProvider, err := ept.newActiveL2EndpointProvider(0) require.NoError(t, err) firstSequencerUsed, err := activeProvider.EthClient(context.Background()) @@ -169,8 +167,7 @@ func TestRollupProvider_FailoverOnErroredSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) // respond true once on creation primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `RollupClient()` the first time - shortCheckDuration := 1 * time.Microsecond - rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + rollupProvider, err := ept.newActiveL2RollupProvider(0) require.NoError(t, err) firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) @@ -195,8 +192,7 @@ func TestEndpointProvider_FailoverOnErroredSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit once on creation: embedded call of `RollupClient()` primarySequencer.ExpectSequencerActive(true, nil) // primary sequencer gets hit twice on creation: implicit call of `EthClient()` - shortCheckDuration := 1 * time.Microsecond - activeProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + activeProvider, err := ept.newActiveL2EndpointProvider(0) require.NoError(t, err) primarySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `EthClient()` the first time @@ -222,8 +218,7 @@ func TestRollupProvider_NoExtraCheckOnActiveSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks Active on creation - shortCheckDuration := 1 * time.Microsecond - rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + rollupProvider, err := ept.newActiveL2RollupProvider(0) require.NoError(t, err) require.Same(t, primarySequencer, rollupProvider.currentRollupClient) @@ -243,8 +238,7 @@ func TestEndpointProvider_NoExtraCheckOnActiveSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks Active twice on creation (once for internal RollupClient() call) primarySequencer.ExpectSequencerActive(true, nil) // default test provider, which always checks, checks Active twice on creation (once for internal EthClient() call) - shortCheckDuration := 1 * time.Microsecond - endpointProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + endpointProvider, err := ept.newActiveL2EndpointProvider(0) require.NoError(t, err) require.Same(t, ept.ethClients[0], endpointProvider.currentEthClient) @@ -263,8 +257,7 @@ func TestRollupProvider_FailoverAndReturn(t *testing.T) { // Primary initially active primarySequencer.ExpectSequencerActive(true, nil) - shortCheckDuration := 1 * time.Microsecond - rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + rollupProvider, err := ept.newActiveL2RollupProvider(0) require.NoError(t, err) // Primary becomes inactive, secondary active @@ -297,8 +290,7 @@ func TestEndpointProvider_FailoverAndReturn(t *testing.T) { // Primary initially active primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice - shortCheckDuration := 1 * time.Microsecond - endpointProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + endpointProvider, err := ept.newActiveL2EndpointProvider(0) require.NoError(t, err) // Primary becomes inactive, secondary active @@ -333,8 +325,7 @@ func TestRollupProvider_InitialActiveSequencerSelection(t *testing.T) { // Primary active at creation primarySequencer.ExpectSequencerActive(true, nil) - shortCheckDuration := 1 * time.Microsecond - rollupProvider, err := ept.newActiveL2RollupProvider(shortCheckDuration) + rollupProvider, err := ept.newActiveL2RollupProvider(0) require.NoError(t, err) // Check immediately after creation without additional Active check @@ -351,14 +342,47 @@ func TestEndpointProvider_InitialActiveSequencerSelection(t *testing.T) { primarySequencer.ExpectSequencerActive(true, nil) primarySequencer.ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice - shortCheckDuration := 1 * time.Microsecond - rollupProvider, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + rollupProvider, err := ept.newActiveL2EndpointProvider(0) require.NoError(t, err) // Check immediately after creation without additional Active check require.Same(t, primarySequencer, rollupProvider.currentRollupClient) } +// TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation verifies that if the first sequencer +// is inactive at the time of ActiveL2RollupProvider creation, the second active sequencer is chosen. +func TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + + // First sequencer is inactive, second sequencer is active + ept.rollupClients[0].ExpectSequencerActive(false, nil) + ept.rollupClients[0].ExpectClose() + ept.rollupClients[1].ExpectSequencerActive(true, nil) + + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + require.Same(t, ept.rollupClients[1], rollupProvider.currentRollupClient) +} + +// TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation verifies that if the first sequencer +// is inactive at the time of ActiveL2EndpointProvider creation, the second active sequencer is chosen. +func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + + // First sequencer is inactive, second sequencer is active + ept.rollupClients[0].ExpectSequencerActive(false, nil) + ept.rollupClients[0].ExpectSequencerActive(false, nil) // see comment in other tests about why we expect this twice + ept.rollupClients[0].ExpectClose() + ept.rollupClients[1].ExpectSequencerActive(true, nil) + ept.rollupClients[1].ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice + + endpointProvider, err := ept.newActiveL2EndpointProvider(0) + require.NoError(t, err) + + require.Same(t, ept.ethClients[1], endpointProvider.currentEthClient) +} + // TestRollupProvider_FailOnAllInactiveSequencers verifies that the ActiveL2RollupProvider // fails to be created when all sequencers are inactive. func TestRollupProvider_FailOnAllInactiveSequencers(t *testing.T) { @@ -370,8 +394,7 @@ func TestRollupProvider_FailOnAllInactiveSequencers(t *testing.T) { sequencer.ExpectClose() } - shortCheckDuration := 1 * time.Microsecond - _, err := ept.newActiveL2RollupProvider(shortCheckDuration) + _, err := ept.newActiveL2RollupProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive } @@ -387,8 +410,7 @@ func TestEndpointProvider_FailOnAllInactiveSequencers(t *testing.T) { sequencer.ExpectClose() } - shortCheckDuration := 1 * time.Microsecond - _, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + _, err := ept.newActiveL2EndpointProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive } @@ -403,8 +425,7 @@ func TestRollupProvider_FailOnAllErroredSequencers(t *testing.T) { sequencer.ExpectClose() } - shortCheckDuration := 1 * time.Microsecond - _, err := ept.newActiveL2RollupProvider(shortCheckDuration) + _, err := ept.newActiveL2RollupProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive } @@ -420,8 +441,7 @@ func TestEndpointProvider_FailOnAllErroredSequencers(t *testing.T) { sequencer.ExpectClose() } - shortCheckDuration := 1 * time.Microsecond - _, err := ept.newActiveL2EndpointProvider(shortCheckDuration) + _, err := ept.newActiveL2EndpointProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive } @@ -466,3 +486,90 @@ func TestEndpointProvider_LongCheckDuration(t *testing.T) { require.NoError(t, err) require.Same(t, ept.ethClients[0], secondEthClient) } + +// TestRollupProvider_ErrorWhenAllSequencersInactive verifies that RollupClient() returns an error +// if all sequencers become inactive after the provider is successfully created. +func TestRollupProvider_ErrorWhenAllSequencersInactive(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + ept.rollupClients[0].ExpectSequencerActive(true, nil) // Main sequencer initially active + + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + // All sequencers become inactive + for _, sequencer := range ept.rollupClients { + sequencer.ExpectSequencerActive(false, nil) + sequencer.ExpectClose() + } + + _, err = rollupProvider.RollupClient(context.Background()) + require.Error(t, err) // Expect an error as all sequencers are inactive +} + +// TestEndpointProvider_ErrorWhenAllSequencersInactive verifies that EthClient() returns an error +// if all sequencers become inactive after the provider is successfully created. +func TestEndpointProvider_ErrorWhenAllSequencersInactive(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + ept.rollupClients[0].ExpectSequencerActive(true, nil) // Main sequencer initially active + ept.rollupClients[0].ExpectSequencerActive(true, nil) // Main sequencer initially active (double check due to embedded call of `EthClient()`) + + endpointProvider, err := ept.newActiveL2EndpointProvider(0) + require.NoError(t, err) + + // All sequencers become inactive + for _, sequencer := range ept.rollupClients { + sequencer.ExpectSequencerActive(false, nil) + sequencer.ExpectClose() + } + + _, err = endpointProvider.EthClient(context.Background()) + require.Error(t, err) // Expect an error as all sequencers are inactive +} + +// TestRollupProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration verifies that the ActiveL2RollupProvider +// still returns the same sequencer across calls even if it becomes inactive, due to a long check duration. +func TestRollupProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + longCheckDuration := 1 * time.Hour + primarySequencer.ExpectSequencerActive(true, nil) // Active on creation + + rollupProvider, err := ept.newActiveL2RollupProvider(longCheckDuration) + require.NoError(t, err) + + // Primary sequencer becomes inactive, but the provider won't check immediately due to longCheckDuration + primarySequencer.ExpectSequencerActive(false, nil) + + firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, primarySequencer, firstSequencerUsed) + + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, primarySequencer, secondSequencerUsed) +} + +// TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration verifies that the ActiveL2EndpointProvider +// still returns the same sequencer across calls even if it becomes inactive, due to a long check duration. +func TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer := ept.rollupClients[0] + + longCheckDuration := 1 * time.Hour + primarySequencer.ExpectSequencerActive(true, nil) // Active on creation + + endpointProvider, err := ept.newActiveL2EndpointProvider(longCheckDuration) + require.NoError(t, err) + + // Primary sequencer becomes inactive, but the provider won't check immediately due to longCheckDuration + primarySequencer.ExpectSequencerActive(false, nil) + + firstEthClientUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], firstEthClientUsed) + + secondEthClientUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], secondEthClientUsed) +} diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 7f0ecbab18e2..9aceaf2a7fb7 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -12,6 +12,9 @@ import ( type rollupDialer func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) +// ActiveL2EndpointProvider is an interface for providing a RollupClient +// It manages the lifecycle of the RollupClient for callers +// It does this by failing over down the list of rollupUrls if the current one is inactive or broken type ActiveL2RollupProvider struct { checkDuration time.Duration networkTimeout time.Duration @@ -26,6 +29,9 @@ type ActiveL2RollupProvider struct { clientLock *sync.Mutex } +// NewActiveL2RollupProvider creates a new ActiveL2RollupProvider +// the checkDuration is the duration between checks to see if the current rollup client is active +// provide a checkDuration of 0 to check every time func NewActiveL2RollupProvider( ctx context.Context, rollupUrls []string, @@ -51,7 +57,7 @@ func newActiveL2RollupProvider( if len(rollupUrls) == 0 { return nil, errors.New("empty rollup urls list") } - rp := &ActiveL2RollupProvider{ + p := &ActiveL2RollupProvider{ checkDuration: checkDuration, networkTimeout: networkTimeout, log: logger, @@ -61,26 +67,22 @@ func newActiveL2RollupProvider( } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - _, err := rp.RollupClient(cctx) + ep := p.rollupUrls[p.rollupIndex] + rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) + if err != nil { + return nil, fmt.Errorf("dialing rollup client: %w", err) + } + p.currentRollupClient = rollupClient + _, err = p.RollupClient(cctx) if err != nil { return nil, fmt.Errorf("dialing rollup client: %w", err) } - return rp, nil + return p, nil } func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (RollupClientInterface, error) { p.clientLock.Lock() defer p.clientLock.Unlock() - if p.currentRollupClient == nil { - cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) - defer cancel() - ep := p.rollupUrls[p.rollupIndex] - rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) - if err != nil { - return nil, fmt.Errorf("dialing rollup client: %w", err) - } - p.currentRollupClient = rollupClient - } err := p.ensureActiveEndpoint(ctx) if err != nil { return nil, err @@ -108,12 +110,12 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error active, err := p.checkCurrentSequencer(ctx) ep := p.rollupUrls[p.rollupIndex] if err != nil { - p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "offset", p.rollupIndex, "url", ep) + p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "index", p.rollupIndex, "url", ep) } else if active { - p.log.Debug("Current sequencer active.", "offset", p.rollupIndex, "url", ep) + p.log.Debug("Current sequencer active.", "index", p.rollupIndex, "url", ep) return nil } else { - p.log.Info("Current sequencer inactive, closing connection and trying next.", "offset", p.rollupIndex, "url", ep) + p.log.Info("Current sequencer inactive, closing connection and trying next.", "index", p.rollupIndex, "url", ep) } if err := p.dialNextSequencer(ctx); err != nil { return fmt.Errorf("dialing next sequencer: %w", err) From 2b2a18536389aa4e26b11213c2d3c0ff52eda882 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 11:06:56 -0500 Subject: [PATCH 20/41] op-service: minor error message change in active endpoint providers. --- op-service/dial/active_l2_provider.go | 2 +- op-service/dial/active_rollup_provider.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index c9c3c924d171..b820bb0f1596 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -81,7 +81,7 @@ func newActiveL2EndpointProvider( p.currentEthClient = ethClient _, err = p.EthClient(cctx) if err != nil { - return nil, fmt.Errorf("dialing eth client: %w", err) + return nil, fmt.Errorf("setting provider eth client: %w", err) } return p, nil } diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 9aceaf2a7fb7..9bd104b44130 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -75,7 +75,7 @@ func newActiveL2RollupProvider( p.currentRollupClient = rollupClient _, err = p.RollupClient(cctx) if err != nil { - return nil, fmt.Errorf("dialing rollup client: %w", err) + return nil, fmt.Errorf("setting provider rollup client: %w", err) } return p, nil } From 00fc6c8b26ead3d9d0420dca0685b7ec9ff9aad4 Mon Sep 17 00:00:00 2001 From: Evan Richard <5766842+EvanJRichard@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:14:12 -0500 Subject: [PATCH 21/41] Update op-service/dial/active_l2_provider.go Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- op-service/dial/active_l2_provider.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index b820bb0f1596..4c2774f76314 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -55,10 +55,10 @@ func newActiveL2EndpointProvider( rollupDialer rollupDialer, ) (*ActiveL2EndpointProvider, error) { if len(rollupUrls) == 0 { - return nil, errors.New("empty rollup urls list") + return nil, errors.New("empty rollup urls list, expected at least one URL") } if len(ethUrls) != len(rollupUrls) { - return nil, errors.New("number of eth urls and rollup urls mismatch") + return nil, errors.New(fmt.Sprintf("number of eth urls (%d) and rollup urls (%d) mismatch", len(ethUrls), len(rollupUrls))) } rollupProvider, err := newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) From 987204a2784a4089ca5c7315210a83a23fccb252 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 11:20:09 -0500 Subject: [PATCH 22/41] op-service: obey linter in rabbit-provided error message change. --- op-service/dial/active_l2_provider.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 4c2774f76314..767d07ac004f 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -58,7 +58,7 @@ func newActiveL2EndpointProvider( return nil, errors.New("empty rollup urls list, expected at least one URL") } if len(ethUrls) != len(rollupUrls) { - return nil, errors.New(fmt.Sprintf("number of eth urls (%d) and rollup urls (%d) mismatch", len(ethUrls), len(rollupUrls))) + return nil, fmt.Errorf("number of eth urls (%d) and rollup urls (%d) mismatch", len(ethUrls), len(rollupUrls)) } rollupProvider, err := newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) From 15514b2df68c862012ecdf0b0058cba2ae9eede3 Mon Sep 17 00:00:00 2001 From: Evan Richard <5766842+EvanJRichard@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:03:19 -0500 Subject: [PATCH 23/41] Update op-service/dial/active_l2_provider.go Co-authored-by: Sebastian Stammler --- op-service/dial/active_l2_provider.go | 1 - 1 file changed, 1 deletion(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 767d07ac004f..a2e646da1871 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -16,7 +16,6 @@ type ethDialer func(ctx context.Context, timeout time.Duration, log log.Logger, // ActiveL2EndpointProvider is an interface for providing a RollupClient and l2 eth client // It manages the lifecycle of the RollupClient and eth client for callers // It does this by failing over down the list of rollupUrls if the current one is inactive or broken - type ActiveL2EndpointProvider struct { ActiveL2RollupProvider currentEthClient EthClientInterface From 1514fb9bec538565b5b29a6e4aecd4048de3a8e0 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 12:11:04 -0500 Subject: [PATCH 24/41] op-service active L2 provider tests: assertAllExpectations after most tests. --- op-service/dial/active_l2_provider_test.go | 36 ++++++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index bcf0edc7910f..855b63fa7bb2 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -107,6 +107,15 @@ func (et *endpointProviderTest) newActiveL2EndpointProvider(checkDuration time.D ) } +func (et *endpointProviderTest) assertAllExpectations(t *testing.T) { + for _, sequencer := range et.rollupClients { + sequencer.AssertExpectations(t) + } + for _, ethClient := range et.ethClients { + ethClient.AssertExpectations(t) + } +} + // TestRollupProvider_FailoverOnInactiveSequencer verifies that the ActiveL2RollupProvider // will switch to the next provider if the current one becomes inactive. func TestRollupProvider_FailoverOnInactiveSequencer(t *testing.T) { @@ -129,6 +138,7 @@ func TestRollupProvider_FailoverOnInactiveSequencer(t *testing.T) { secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, secondarySequencer, secondSequencerUsed) + ept.assertAllExpectations(t) } // TestEndpointProvider_FailoverOnInactiveSequencer verifies that the ActiveL2EndpointProvider @@ -156,6 +166,7 @@ func TestEndpointProvider_FailoverOnInactiveSequencer(t *testing.T) { secondSequencerUsed, err := activeProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[1], secondSequencerUsed) + ept.assertAllExpectations(t) } // TestRollupProvider_FailoverOnErroredSequencer verifies that the ActiveL2RollupProvider @@ -180,6 +191,7 @@ func TestRollupProvider_FailoverOnErroredSequencer(t *testing.T) { secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, secondarySequencer, secondSequencerUsed) + ept.assertAllExpectations(t) } // TestEndpointProvider_FailoverOnErroredSequencer verifies that the ActiveL2EndpointProvider @@ -208,6 +220,7 @@ func TestEndpointProvider_FailoverOnErroredSequencer(t *testing.T) { secondSequencerUsed, err := activeProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, secondaryEthClient, secondSequencerUsed) + ept.assertAllExpectations(t) } // TestRollupProvider_NoExtraCheckOnActiveSequencer verifies that the ActiveL2RollupProvider @@ -227,6 +240,7 @@ func TestRollupProvider_NoExtraCheckOnActiveSequencer(t *testing.T) { firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, firstSequencerUsed) + ept.assertAllExpectations(t) } // TestEndpointProvider_NoExtraCheckOnActiveSequencer verifies that the ActiveL2EndpointProvider @@ -247,6 +261,7 @@ func TestEndpointProvider_NoExtraCheckOnActiveSequencer(t *testing.T) { firstEthClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[0], firstEthClientUsed) + ept.assertAllExpectations(t) } // TestRollupProvider_FailoverAndReturn verifies the ActiveL2RollupProvider's ability to @@ -279,6 +294,7 @@ func TestRollupProvider_FailoverAndReturn(t *testing.T) { thirdSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, thirdSequencerUsed) + ept.assertAllExpectations(t) } // TestEndpointProvider_FailoverAndReturn verifies the ActiveL2EndpointProvider's ability to @@ -314,6 +330,7 @@ func TestEndpointProvider_FailoverAndReturn(t *testing.T) { thirdSequencerUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[0], thirdSequencerUsed) + ept.assertAllExpectations(t) } // TestRollupProvider_InitialActiveSequencerSelection verifies that the ActiveL2RollupProvider @@ -330,6 +347,7 @@ func TestRollupProvider_InitialActiveSequencerSelection(t *testing.T) { // Check immediately after creation without additional Active check require.Same(t, primarySequencer, rollupProvider.currentRollupClient) + ept.assertAllExpectations(t) } // TestEndpointProvider_InitialActiveSequencerSelection verifies that the ActiveL2EndpointProvider @@ -347,6 +365,7 @@ func TestEndpointProvider_InitialActiveSequencerSelection(t *testing.T) { // Check immediately after creation without additional Active check require.Same(t, primarySequencer, rollupProvider.currentRollupClient) + ept.assertAllExpectations(t) } // TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation verifies that if the first sequencer @@ -363,6 +382,7 @@ func TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testin require.NoError(t, err) require.Same(t, ept.rollupClients[1], rollupProvider.currentRollupClient) + ept.assertAllExpectations(t) } // TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation verifies that if the first sequencer @@ -372,7 +392,6 @@ func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *test // First sequencer is inactive, second sequencer is active ept.rollupClients[0].ExpectSequencerActive(false, nil) - ept.rollupClients[0].ExpectSequencerActive(false, nil) // see comment in other tests about why we expect this twice ept.rollupClients[0].ExpectClose() ept.rollupClients[1].ExpectSequencerActive(true, nil) ept.rollupClients[1].ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice @@ -381,6 +400,7 @@ func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *test require.NoError(t, err) require.Same(t, ept.ethClients[1], endpointProvider.currentEthClient) + ept.assertAllExpectations(t) } // TestRollupProvider_FailOnAllInactiveSequencers verifies that the ActiveL2RollupProvider @@ -396,13 +416,13 @@ func TestRollupProvider_FailOnAllInactiveSequencers(t *testing.T) { _, err := ept.newActiveL2RollupProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive + ept.assertAllExpectations(t) } // TestEndpointProvider_FailOnAllInactiveSequencers verifies that the ActiveL2EndpointProvider // fails to be created when all sequencers are inactive. func TestEndpointProvider_FailOnAllInactiveSequencers(t *testing.T) { ept := setupEndpointProviderTest(t, 2) - ept.rollupClients[0].ExpectSequencerActive(false, nil) // see comment in other tests about why we expect this an extra time // All sequencers are inactive for _, sequencer := range ept.rollupClients { @@ -412,6 +432,7 @@ func TestEndpointProvider_FailOnAllInactiveSequencers(t *testing.T) { _, err := ept.newActiveL2EndpointProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive + ept.assertAllExpectations(t) } // TestRollupProvider_FailOnAllErroredSequencers verifies that the ActiveL2RollupProvider @@ -427,13 +448,13 @@ func TestRollupProvider_FailOnAllErroredSequencers(t *testing.T) { _, err := ept.newActiveL2RollupProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive + ept.assertAllExpectations(t) } // TestEndpointProvider_FailOnAllErroredSequencers verifies that the ActiveL2EndpointProvider // fails to create when all sequencers return an error. func TestEndpointProvider_FailOnAllErroredSequencers(t *testing.T) { ept := setupEndpointProviderTest(t, 2) - ept.rollupClients[0].ExpectSequencerActive(true, fmt.Errorf("a test error")) // see comment in other tests about why we expect this an extra time // All sequencers are inactive for _, sequencer := range ept.rollupClients { @@ -443,6 +464,7 @@ func TestEndpointProvider_FailOnAllErroredSequencers(t *testing.T) { _, err := ept.newActiveL2EndpointProvider(0) require.Error(t, err) // Expect an error as all sequencers are inactive + ept.assertAllExpectations(t) } // TestRollupProvider_LongCheckDuration verifies the behavior of ActiveL2RollupProvider with a long check duration. @@ -464,6 +486,7 @@ func TestRollupProvider_LongCheckDuration(t *testing.T) { secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, secondSequencerUsed) + ept.assertAllExpectations(t) } // TestEndpointProvider_LongCheckDuration verifies the behavior of ActiveL2EndpointProvider with a long check duration. @@ -485,6 +508,7 @@ func TestEndpointProvider_LongCheckDuration(t *testing.T) { secondEthClient, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[0], secondEthClient) + ept.assertAllExpectations(t) } // TestRollupProvider_ErrorWhenAllSequencersInactive verifies that RollupClient() returns an error @@ -504,6 +528,7 @@ func TestRollupProvider_ErrorWhenAllSequencersInactive(t *testing.T) { _, err = rollupProvider.RollupClient(context.Background()) require.Error(t, err) // Expect an error as all sequencers are inactive + ept.assertAllExpectations(t) } // TestEndpointProvider_ErrorWhenAllSequencersInactive verifies that EthClient() returns an error @@ -524,6 +549,7 @@ func TestEndpointProvider_ErrorWhenAllSequencersInactive(t *testing.T) { _, err = endpointProvider.EthClient(context.Background()) require.Error(t, err) // Expect an error as all sequencers are inactive + ept.assertAllExpectations(t) } // TestRollupProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration verifies that the ActiveL2RollupProvider @@ -548,6 +574,8 @@ func TestRollupProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t *t secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, secondSequencerUsed) + // we do not assertAllExpectations here because we are expecting exactly that the provider _doesn't_ + // call `SequencerActive()` again, which would cause the expectations to fail. } // TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration verifies that the ActiveL2EndpointProvider @@ -572,4 +600,6 @@ func TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t secondEthClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[0], secondEthClientUsed) + // we do not assertAllExpectations here because we are expecting exactly that the provider _doesn't_ + // call `SequencerActive()` again, which would cause the expectations to fail. } From 3599b84abe0fe8cd51cac985ecd8fb239d9949ca Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 12:36:03 -0500 Subject: [PATCH 25/41] op-service: more elegantly handle startup in active l2 providers, and improve testing. --- op-service/dial/active_l2_provider.go | 15 ++++------ op-service/dial/active_l2_provider_test.go | 35 ++++++++++++++++++++++ op-service/dial/active_rollup_provider.go | 30 ++++++++++++------- 3 files changed, 60 insertions(+), 20 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 767d07ac004f..39ad5e5d7c2b 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -72,13 +72,6 @@ func newActiveL2EndpointProvider( } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - p.ethClientIndex = p.rollupIndex - ep := p.ethUrls[p.ethClientIndex] - ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) - if err != nil { - return nil, fmt.Errorf("dialing eth client: %w", err) - } - p.currentEthClient = ethClient _, err = p.EthClient(cctx) if err != nil { return nil, fmt.Errorf("setting provider eth client: %w", err) @@ -93,18 +86,20 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte if err != nil { return nil, err } - if p.ethClientIndex != p.rollupIndex { + if p.ethClientIndex != p.rollupIndex || p.currentEthClient == nil { // we changed sequencers, dial a new EthClient cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() p.ethClientIndex = p.rollupIndex ep := p.ethUrls[p.ethClientIndex] - log.Info("sequencer changed, dialing new eth client", "new_index", p.rollupIndex, "new_url", ep) + log.Info("sequencer changed (or ethClient was nil due to startup), dialing new eth client", "new_index", p.rollupIndex, "new_url", ep) ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - p.currentEthClient.Close() + if p.currentEthClient != nil { + p.currentEthClient.Close() + } p.currentEthClient = ethClient } return p.currentEthClient, nil diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 855b63fa7bb2..7b725826a778 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -403,6 +403,41 @@ func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *test ept.assertAllExpectations(t) } +// TestRollupProvider_ConstructorErrorOnFirstSequencerOffline verifies that the ActiveL2RollupProvider +// constructor handles the case where the first sequencer (index 0) is offline at startup. +func TestRollupProvider_ConstructorErrorOnFirstSequencerOffline(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + + // First sequencer is dead, second sequencer is active + ept.rollupClients[0].ExpectSequencerActive(false, fmt.Errorf("I am offline")) + ept.rollupClients[0].ExpectClose() + ept.rollupClients[1].ExpectSequencerActive(true, nil) + + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + require.Same(t, ept.rollupClients[1], rollupProvider.currentRollupClient) + ept.assertAllExpectations(t) +} + +// TestEndpointProvider_ConstructorErrorOnFirstSequencerOffline verifies that the ActiveL2EndpointProvider +// constructor handles the case where the first sequencer (index 0) is offline at startup. +func TestEndpointProvider_ConstructorErrorOnFirstSequencerOffline(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + + // First sequencer is dead, second sequencer is active + ept.rollupClients[0].ExpectSequencerActive(false, fmt.Errorf("I am offline")) + ept.rollupClients[0].ExpectClose() + ept.rollupClients[1].ExpectSequencerActive(true, nil) + ept.rollupClients[1].ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice + + endpointProvider, err := ept.newActiveL2EndpointProvider(0) + require.NoError(t, err) + + require.Same(t, ept.ethClients[1], endpointProvider.currentEthClient) + ept.assertAllExpectations(t) +} + // TestRollupProvider_FailOnAllInactiveSequencers verifies that the ActiveL2RollupProvider // fails to be created when all sequencers are inactive. func TestRollupProvider_FailOnAllInactiveSequencers(t *testing.T) { diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 9bd104b44130..13c5207e6e1c 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -64,16 +64,11 @@ func newActiveL2RollupProvider( rollupUrls: rollupUrls, rollupDialer: dialer, clientLock: &sync.Mutex{}, + rollupIndex: -1, } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - ep := p.rollupUrls[p.rollupIndex] - rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) - if err != nil { - return nil, fmt.Errorf("dialing rollup client: %w", err) - } - p.currentRollupClient = rollupClient - _, err = p.RollupClient(cctx) + _, err := p.RollupClient(cctx) if err != nil { return nil, fmt.Errorf("setting provider rollup client: %w", err) } @@ -105,10 +100,23 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { return time.Now().After(p.activeTimeout) } +func (p *ActiveL2RollupProvider) currentEndpoint() string { + if p.rollupIndex < 0 { + return "no endpoint set" + } + return p.rollupUrls[p.rollupIndex] +} + func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { + if p.currentRollupClient == nil { + err := p.dialNextSequencer(ctx) + if err != nil { + return fmt.Errorf("dialing first sequencer: %w", err) + } + } for range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) - ep := p.rollupUrls[p.rollupIndex] + ep := p.currentEndpoint() if err != nil { p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "index", p.rollupIndex, "url", ep) } else if active { @@ -139,13 +147,15 @@ func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { defer cancel() p.rollupIndex = (p.rollupIndex + 1) % p.numEndpoints() - ep := p.rollupUrls[p.rollupIndex] + ep := p.currentEndpoint() p.log.Info("Dialing next sequencer.", "index", p.rollupIndex, "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) } - p.currentRollupClient.Close() + if p.currentRollupClient != nil { + p.currentRollupClient.Close() + } p.currentRollupClient = rollupClient return nil } From 314c94f854bf8765cf20590ea3e49cc7eccd9297 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 12:56:26 -0500 Subject: [PATCH 26/41] Change remaining longDurationTests to be able to use ept.assertAllExpectations. --- op-service/dial/active_l2_provider_test.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 7b725826a778..7bd8910f0fbf 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -601,16 +601,18 @@ func TestRollupProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t *t // Primary sequencer becomes inactive, but the provider won't check immediately due to longCheckDuration primarySequencer.ExpectSequencerActive(false, nil) - firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, firstSequencerUsed) + active, err := primarySequencer.SequencerActive(context.Background()) + require.NoError(t, err) + require.False(t, active) + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, primarySequencer, secondSequencerUsed) - // we do not assertAllExpectations here because we are expecting exactly that the provider _doesn't_ - // call `SequencerActive()` again, which would cause the expectations to fail. + ept.assertAllExpectations(t) } // TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration verifies that the ActiveL2EndpointProvider @@ -627,14 +629,16 @@ func TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t // Primary sequencer becomes inactive, but the provider won't check immediately due to longCheckDuration primarySequencer.ExpectSequencerActive(false, nil) - firstEthClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[0], firstEthClientUsed) + active, err := primarySequencer.SequencerActive(context.Background()) + require.NoError(t, err) + require.False(t, active) + secondEthClientUsed, err := endpointProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[0], secondEthClientUsed) - // we do not assertAllExpectations here because we are expecting exactly that the provider _doesn't_ - // call `SequencerActive()` again, which would cause the expectations to fail. + ept.assertAllExpectations(t) } From a01b1399bf6207187a8b0a53195c9573ecd3d3ac Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 13:22:51 -0500 Subject: [PATCH 27/41] use new error errSeqUnset. --- op-service/dial/active_rollup_provider.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 13c5207e6e1c..a47270a3839b 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -75,6 +75,8 @@ func newActiveL2RollupProvider( return p, nil } +var errSeqUnset = errors.New("sequencer unset") + func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (RollupClientInterface, error) { p.clientLock.Lock() defer p.clientLock.Unlock() @@ -100,13 +102,6 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { return time.Now().After(p.activeTimeout) } -func (p *ActiveL2RollupProvider) currentEndpoint() string { - if p.rollupIndex < 0 { - return "no endpoint set" - } - return p.rollupUrls[p.rollupIndex] -} - func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { if p.currentRollupClient == nil { err := p.dialNextSequencer(ctx) @@ -116,8 +111,9 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error } for range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) - ep := p.currentEndpoint() - if err != nil { + if errors.Is(err, errSeqUnset) { + log.Debug("Current sequencer unset.") + } else if ep := p.rollupUrls[p.rollupIndex]; err != nil { p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "index", p.rollupIndex, "url", ep) } else if active { p.log.Debug("Current sequencer active.", "index", p.rollupIndex, "url", ep) @@ -133,6 +129,9 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error } func (p *ActiveL2RollupProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { + if p.currentRollupClient == nil { + return false, errSeqUnset + } cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() return p.currentRollupClient.SequencerActive(cctx) @@ -147,7 +146,7 @@ func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { defer cancel() p.rollupIndex = (p.rollupIndex + 1) % p.numEndpoints() - ep := p.currentEndpoint() + ep := p.rollupUrls[p.rollupIndex] p.log.Info("Dialing next sequencer.", "index", p.rollupIndex, "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { From c635a1b6562fffdd6cfa1486682a928c0ac2396d Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 13:38:08 -0500 Subject: [PATCH 28/41] Add test for scenario where many sequencers are inactive, and only the last is active. --- op-service/dial/active_l2_provider_test.go | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 7bd8910f0fbf..47a07fcc664e 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -385,6 +385,25 @@ func TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testin ept.assertAllExpectations(t) } +// TestRollupProvider_SelectLastSequencerIManyInactiveAtCreation verifies that if all but the last sequencer +// are inactive at the time of ActiveL2RollupProvider creation, the last active sequencer is chosen. +func TestRollupProvider_SelectLastSequencerIfManyInactiveAtCreation(t *testing.T) { + ept := setupEndpointProviderTest(t, 5) + + // First four sequencers are inactive, last sequencer is active + for i := 0; i < 4; i++ { + ept.rollupClients[i].ExpectSequencerActive(false, nil) + ept.rollupClients[i].ExpectClose() + } + ept.rollupClients[4].ExpectSequencerActive(true, nil) + + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + require.Same(t, ept.rollupClients[4], rollupProvider.currentRollupClient) + ept.assertAllExpectations(t) +} + // TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation verifies that if the first sequencer // is inactive at the time of ActiveL2EndpointProvider creation, the second active sequencer is chosen. func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testing.T) { @@ -403,6 +422,26 @@ func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *test ept.assertAllExpectations(t) } +// TestEndpointProvider_SelectLastSequencerIfManyInactiveAtCreation verifies that if all but the last sequencer +// are inactive at the time of ActiveL2EndpointProvider creation, the last active sequencer is chosen. +func TestEndpointProvider_SelectLastSequencerIfManyInactiveAtCreation(t *testing.T) { + ept := setupEndpointProviderTest(t, 5) + + // First four sequencers are inactive, last sequencer is active + for i := 0; i < 4; i++ { + ept.rollupClients[i].ExpectSequencerActive(false, nil) + ept.rollupClients[i].ExpectClose() + } + ept.rollupClients[4].ExpectSequencerActive(true, nil) + ept.rollupClients[4].ExpectSequencerActive(true, nil) // Double check due to embedded call of `EthClient()` + + endpointProvider, err := ept.newActiveL2EndpointProvider(0) + require.NoError(t, err) + + require.Same(t, ept.ethClients[4], endpointProvider.currentEthClient) + ept.assertAllExpectations(t) +} + // TestRollupProvider_ConstructorErrorOnFirstSequencerOffline verifies that the ActiveL2RollupProvider // constructor handles the case where the first sequencer (index 0) is offline at startup. func TestRollupProvider_ConstructorErrorOnFirstSequencerOffline(t *testing.T) { From 8177ca17868b5a9644cbcd3c29535c134f78e046 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 13:43:09 -0500 Subject: [PATCH 29/41] Readability change: move the on-creation initialization to its own function. --- op-service/dial/active_rollup_provider.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index a47270a3839b..d8cf86a9e081 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -102,12 +102,17 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { return time.Now().After(p.activeTimeout) } +func (p *ActiveL2RollupProvider) ensureClientInitialized(ctx context.Context) error { + if p.currentRollupClient != nil { + return nil + } + return p.dialNextSequencer(ctx) +} + func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - if p.currentRollupClient == nil { - err := p.dialNextSequencer(ctx) - if err != nil { - return fmt.Errorf("dialing first sequencer: %w", err) - } + err := p.ensureClientInitialized(ctx) + if err != nil { + return fmt.Errorf("initializing rollup client: %w", err) } for range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) From ee0191fdaa0caccbb77068085fa1b8d9923632f9 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 13:57:22 -0500 Subject: [PATCH 30/41] Move extra one-time dial to constructor. --- op-service/dial/active_rollup_provider.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index d8cf86a9e081..d62e029d759d 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -68,7 +68,11 @@ func newActiveL2RollupProvider( } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - _, err := p.RollupClient(cctx) + err := p.ensureClientInitialized(cctx) + if err != nil { + return nil, fmt.Errorf("dialing initial rollup client: %w", err) + } + _, err = p.RollupClient(cctx) if err != nil { return nil, fmt.Errorf("setting provider rollup client: %w", err) } @@ -110,10 +114,6 @@ func (p *ActiveL2RollupProvider) ensureClientInitialized(ctx context.Context) er } func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - err := p.ensureClientInitialized(ctx) - if err != nil { - return fmt.Errorf("initializing rollup client: %w", err) - } for range p.rollupUrls { active, err := p.checkCurrentSequencer(ctx) if errors.Is(err, errSeqUnset) { From f8efb3c08689d456cd26f354c83d79815913ab8a Mon Sep 17 00:00:00 2001 From: Evan Richard <5766842+EvanJRichard@users.noreply.github.com> Date: Mon, 18 Dec 2023 14:35:34 -0500 Subject: [PATCH 31/41] Update op-service/dial/active_rollup_provider.go Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- op-service/dial/active_rollup_provider.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index d62e029d759d..33b0c2a656f2 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -165,5 +165,7 @@ func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { } func (p *ActiveL2RollupProvider) Close() { - p.currentRollupClient.Close() + if p.currentRollupClient != nil { + p.currentRollupClient.Close() + } } From 73e749e2c14565bda38f05670da7bfb4f721d15c Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 14:39:17 -0500 Subject: [PATCH 32/41] Add nil check to active l2 provider. --- op-service/dial/active_l2_provider.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 4da81a9b0a77..7c1b071214c7 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -105,6 +105,8 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte } func (p *ActiveL2EndpointProvider) Close() { - p.currentEthClient.Close() + if p.currentEthClient != nil { + p.currentEthClient.Close() + } p.ActiveL2RollupProvider.Close() } From e54448e1bfb7f58435d443f018b3f836b1be99b4 Mon Sep 17 00:00:00 2001 From: Evan Richard <5766842+EvanJRichard@users.noreply.github.com> Date: Mon, 18 Dec 2023 15:18:17 -0500 Subject: [PATCH 33/41] Update op-service/dial/active_rollup_provider.go Co-authored-by: Sebastian Stammler --- op-service/dial/active_rollup_provider.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index 33b0c2a656f2..bb681e3a57d6 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -127,7 +127,7 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error p.log.Info("Current sequencer inactive, closing connection and trying next.", "index", p.rollupIndex, "url", ep) } if err := p.dialNextSequencer(ctx); err != nil { - return fmt.Errorf("dialing next sequencer: %w", err) + p.log.Warn("Error dialing next sequencer.", "err", err, "index", p.rollupIndex) } } return fmt.Errorf("failed to find an active sequencer, tried following urls: %v", p.rollupUrls) From 919e5980886e78653c5e13a68cff47f3a549a80a Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Mon, 18 Dec 2023 15:46:08 -0500 Subject: [PATCH 34/41] Address review comment: change many-inactive tests to many-undialable tests. --- op-service/dial/active_l2_provider_test.go | 55 ++++++++++++++-------- op-service/dial/active_rollup_provider.go | 2 +- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 47a07fcc664e..3f2f13b127e9 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -14,22 +14,28 @@ import ( // endpointProviderTest is a test harness for setting up endpoint provider tests. type endpointProviderTest struct { - t *testing.T - rollupClients []*testutils.MockRollupClient - ethClients []*testutils.MockEthClient + t *testing.T + rollupClients []*testutils.MockRollupClient + ethClients []*testutils.MockEthClient + rollupDialOutcomes map[int]bool // true for success, false for failure + ethDialOutcomes map[int]bool // true for success, false for failure } // setupEndpointProviderTest sets up the basic structure of the endpoint provider tests. func setupEndpointProviderTest(t *testing.T, numSequencers int) *endpointProviderTest { ept := &endpointProviderTest{ - t: t, - rollupClients: make([]*testutils.MockRollupClient, numSequencers), - ethClients: make([]*testutils.MockEthClient, numSequencers), + t: t, + rollupClients: make([]*testutils.MockRollupClient, numSequencers), + ethClients: make([]*testutils.MockEthClient, numSequencers), + rollupDialOutcomes: make(map[int]bool), + ethDialOutcomes: make(map[int]bool), } for i := 0; i < numSequencers; i++ { ept.rollupClients[i] = new(testutils.MockRollupClient) ept.ethClients[i] = new(testutils.MockEthClient) + ept.rollupDialOutcomes[i] = true // by default, all dials succeed + ept.ethDialOutcomes[i] = true // by default, all dials succeed } return ept @@ -37,9 +43,12 @@ func setupEndpointProviderTest(t *testing.T, numSequencers int) *endpointProvide // newActiveL2EndpointProvider constructs a new ActiveL2RollupProvider using the test harness setup. func (et *endpointProviderTest) newActiveL2RollupProvider(checkDuration time.Duration) (*ActiveL2RollupProvider, error) { - rollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { + mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { for i, client := range et.rollupClients { if url == fmt.Sprintf("rollup%d", i) { + if !et.rollupDialOutcomes[i] { + return nil, fmt.Errorf("simulated dial failure for rollup %d", i) + } return client, nil } } @@ -59,7 +68,7 @@ func (et *endpointProviderTest) newActiveL2RollupProvider(checkDuration time.Dur checkDuration, 1*time.Minute, testlog.Logger(et.t, log.LvlDebug), - rollupDialer, + mockRollupDialer, ) } @@ -68,6 +77,9 @@ func (et *endpointProviderTest) newActiveL2EndpointProvider(checkDuration time.D mockRollupDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (RollupClientInterface, error) { for i, client := range et.rollupClients { if url == fmt.Sprintf("rollup%d", i) { + if !et.rollupDialOutcomes[i] { + return nil, fmt.Errorf("simulated dial failure for rollup %d", i) + } return client, nil } } @@ -77,6 +89,9 @@ func (et *endpointProviderTest) newActiveL2EndpointProvider(checkDuration time.D mockEthDialer := func(ctx context.Context, timeout time.Duration, log log.Logger, url string) (EthClientInterface, error) { for i, client := range et.ethClients { if url == fmt.Sprintf("eth%d", i) { + if !et.ethDialOutcomes[i] { + return nil, fmt.Errorf("simulated dial failure for eth %d", i) + } return client, nil } } @@ -116,6 +131,10 @@ func (et *endpointProviderTest) assertAllExpectations(t *testing.T) { } } +func (ept *endpointProviderTest) setRollupDialOutcome(index int, success bool) { + ept.rollupDialOutcomes[index] = success +} + // TestRollupProvider_FailoverOnInactiveSequencer verifies that the ActiveL2RollupProvider // will switch to the next provider if the current one becomes inactive. func TestRollupProvider_FailoverOnInactiveSequencer(t *testing.T) { @@ -385,15 +404,14 @@ func TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testin ept.assertAllExpectations(t) } -// TestRollupProvider_SelectLastSequencerIManyInactiveAtCreation verifies that if all but the last sequencer -// are inactive at the time of ActiveL2RollupProvider creation, the last active sequencer is chosen. -func TestRollupProvider_SelectLastSequencerIfManyInactiveAtCreation(t *testing.T) { +// TestRollupProvider_SelectLastSequencerIfManyOfflineAtCreation verifies that if all but the last sequencer +// are offline at the time of ActiveL2RollupProvider creation, the last active sequencer is chosen. +func TestRollupProvider_SelectLastSequencerIfManyOfflineAtCreation(t *testing.T) { ept := setupEndpointProviderTest(t, 5) - // First four sequencers are inactive, last sequencer is active + // First four sequencers are dead, last sequencer is active for i := 0; i < 4; i++ { - ept.rollupClients[i].ExpectSequencerActive(false, nil) - ept.rollupClients[i].ExpectClose() + ept.setRollupDialOutcome(i, false) } ept.rollupClients[4].ExpectSequencerActive(true, nil) @@ -404,9 +422,9 @@ func TestRollupProvider_SelectLastSequencerIfManyInactiveAtCreation(t *testing.T ept.assertAllExpectations(t) } -// TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation verifies that if the first sequencer +// TestEndpointProvider_SelectSecondSequencerIfFirstOfflineAtCreation verifies that if the first sequencer // is inactive at the time of ActiveL2EndpointProvider creation, the second active sequencer is chosen. -func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testing.T) { +func TestEndpointProvider_SelectSecondSequencerIfFirstOfflineAtCreation(t *testing.T) { ept := setupEndpointProviderTest(t, 2) // First sequencer is inactive, second sequencer is active @@ -427,10 +445,9 @@ func TestEndpointProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *test func TestEndpointProvider_SelectLastSequencerIfManyInactiveAtCreation(t *testing.T) { ept := setupEndpointProviderTest(t, 5) - // First four sequencers are inactive, last sequencer is active + // First four sequencers are dead, last sequencer is active for i := 0; i < 4; i++ { - ept.rollupClients[i].ExpectSequencerActive(false, nil) - ept.rollupClients[i].ExpectClose() + ept.setRollupDialOutcome(i, false) } ept.rollupClients[4].ExpectSequencerActive(true, nil) ept.rollupClients[4].ExpectSequencerActive(true, nil) // Double check due to embedded call of `EthClient()` diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index bb681e3a57d6..a6fe799b2f7f 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -70,7 +70,7 @@ func newActiveL2RollupProvider( defer cancel() err := p.ensureClientInitialized(cctx) if err != nil { - return nil, fmt.Errorf("dialing initial rollup client: %w", err) + p.log.Warn("Error dialing initial rollup client.", "err", err) } _, err = p.RollupClient(cctx) if err != nil { From 26d46b78e51903d3e5e7acd17b125a8f756233da Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 19 Dec 2023 10:07:39 -0500 Subject: [PATCH 35/41] Add test that reproduces internal state corruption. --- op-service/dial/active_l2_provider_test.go | 82 +++++++++++++++++++++- op-service/testutils/mock_rollup_client.go | 4 ++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 3f2f13b127e9..2d633426175c 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -131,8 +131,8 @@ func (et *endpointProviderTest) assertAllExpectations(t *testing.T) { } } -func (ept *endpointProviderTest) setRollupDialOutcome(index int, success bool) { - ept.rollupDialOutcomes[index] = success +func (et *endpointProviderTest) setRollupDialOutcome(index int, success bool) { + et.rollupDialOutcomes[index] = success } // TestRollupProvider_FailoverOnInactiveSequencer verifies that the ActiveL2RollupProvider @@ -698,3 +698,81 @@ func TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t require.Same(t, ept.ethClients[0], secondEthClientUsed) ept.assertAllExpectations(t) } + +// TestRollupProvider_HandlesIndexClientMismatch verifies +func TestRollupProvider_HandlesIndexClientMismatch(t *testing.T) { + ept := setupEndpointProviderTest(t, 2) + primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] + + // primarySequencer is active on creation + primarySequencer.ExpectSequencerActive(true, nil) // active on creation + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + primarySequencer.ExpectSequencerActive(false, nil) // ZDD: sequencer goes down + primarySequencer.ExpectClose() + primarySequencer.ExpectSequencerActive(false, nil) // buggy behavior: we shouldn't have to expect this twice! A fixed version will let us remove this + ept.setRollupDialOutcome(1, false) // secondarySequencer fails to dial + // now get the rollupClient, we expect an error + rollupClient, err := rollupProvider.RollupClient(context.Background()) + require.Error(t, err) + require.Nil(t, rollupClient) + require.Same(t, primarySequencer, rollupProvider.currentRollupClient) // if this passes, it's a bug + require.Equal(t, 1, rollupProvider.rollupIndex) + + // now, 0 is still inactive, but 1 becomes dialable and active + primarySequencer.ExpectSequencerActive(false, nil) + primarySequencer.ExpectClose() + secondarySequencer.ExpectSequencerActive(true, nil) + ept.setRollupDialOutcome(1, true) // secondarySequencer dials successfully + + rollupClient, err = rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, secondarySequencer, rollupClient) + ept.assertAllExpectations(t) +} + +// what if we did a test with 3 sequencers? +func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { + ept := setupEndpointProviderTest(t, 3) + seq0, seq1, seq2 := ept.rollupClients[0], ept.rollupClients[1], ept.rollupClients[2] + + // "start happy": primarySequencer is active on creation + seq0.ExpectSequencerActive(true, nil) // active on creation + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + // primarySequencer goes down + seq0.ExpectSequencerActive(false, fmt.Errorf("I'm offline now")) + seq0.ExpectClose() + ept.setRollupDialOutcome(0, false) // primarySequencer fails to dial + // secondarySequencer is inactive, but online + seq1.ExpectSequencerActive(false, nil) + seq1.ExpectClose() + seq1.ExpectSequencerActive(false, nil) // a non-buggy impl shouldn't need this line. + // tertiarySequencer can't even be dialed + ept.setRollupDialOutcome(2, false) + // after calling RollupClient, index will be set to 0, but currentRollupClient is secondarySequencer + rollupClient, err := rollupProvider.RollupClient(context.Background()) + require.Error(t, err) + require.Nil(t, rollupClient) + require.Same(t, seq1, rollupProvider.currentRollupClient) // if this passes, it's a bug! a non-buggy impl would be nil, or at least seq0 - not seq1 + require.Equal(t, 0, rollupProvider.rollupIndex) + // internal state is now inconsistent in the buggy impl. + + // now seq0 is dialable and active + ept.setRollupDialOutcome(0, true) + seq0.ExpectSequencerActive(true, nil) + seq0.MaybeClose() + // now seq1 and seq2 are dialable, but inactive + ept.setRollupDialOutcome(1, true) + seq1.ExpectSequencerActive(false, nil) + seq1.MaybeClose() + ept.setRollupDialOutcome(2, true) + seq2.ExpectSequencerActive(false, nil) + seq2.MaybeClose() + // now trigger the bug by calling for the client + rollupClient, err = rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, seq0, rollupClient) +} diff --git a/op-service/testutils/mock_rollup_client.go b/op-service/testutils/mock_rollup_client.go index a6e79c996ad3..637799b9d325 100644 --- a/op-service/testutils/mock_rollup_client.go +++ b/op-service/testutils/mock_rollup_client.go @@ -62,6 +62,10 @@ func (m *MockRollupClient) ExpectClose() { m.Mock.On("Close").Once() } +func (m *MockRollupClient) MaybeClose() { + m.Mock.On("Close").Once().Maybe() +} + func (m *MockRollupClient) Close() { m.Mock.Called() } From 6f76ca8c5e712cdbfdbda41b3f94c2c7dc73c3e2 Mon Sep 17 00:00:00 2001 From: Sebastian Stammler Date: Tue, 19 Dec 2023 00:33:25 +0100 Subject: [PATCH 36/41] op-service: Improve active seq provider - Preserve the invariant that the index and current rollup/eth client match. - Dial at the start of the loop instead of at the end. --- op-service/dial/active_l2_provider.go | 16 +++-- op-service/dial/active_rollup_provider.go | 78 ++++++++++++----------- 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 7c1b071214c7..5eb3fadbc050 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -34,11 +34,13 @@ func NewActiveL2EndpointProvider(ctx context.Context, logger log.Logger, ) (*ActiveL2EndpointProvider, error) { ethDialer := func(ctx context.Context, timeout time.Duration, - log log.Logger, url string) (EthClientInterface, error) { + log log.Logger, url string, + ) (EthClientInterface, error) { return DialEthClientWithTimeout(ctx, timeout, log, url) } rollupDialer := func(ctx context.Context, timeout time.Duration, - log log.Logger, url string) (RollupClientInterface, error) { + log log.Logger, url string, + ) (RollupClientInterface, error) { return DialRollupClientWithTimeout(ctx, timeout, log, url) } return newActiveL2EndpointProvider(ctx, ethUrls, rollupUrls, checkDuration, networkTimeout, logger, ethDialer, rollupDialer) @@ -71,8 +73,7 @@ func newActiveL2EndpointProvider( } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - _, err = p.EthClient(cctx) - if err != nil { + if _, err = p.EthClient(cctx); err != nil { return nil, fmt.Errorf("setting provider eth client: %w", err) } return p, nil @@ -89,9 +90,9 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte // we changed sequencers, dial a new EthClient cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - p.ethClientIndex = p.rollupIndex - ep := p.ethUrls[p.ethClientIndex] - log.Info("sequencer changed (or ethClient was nil due to startup), dialing new eth client", "new_index", p.rollupIndex, "new_url", ep) + idx := p.rollupIndex + ep := p.ethUrls[idx] + log.Info("sequencer changed (or ethClient was nil due to startup), dialing new eth client", "new_index", idx, "new_url", ep) ethClient, err := p.ethDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) @@ -99,6 +100,7 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte if p.currentEthClient != nil { p.currentEthClient.Close() } + p.ethClientIndex = idx p.currentEthClient = ethClient } return p.currentEthClient, nil diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index a6fe799b2f7f..d982559c499c 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -40,7 +40,8 @@ func NewActiveL2RollupProvider( logger log.Logger, ) (*ActiveL2RollupProvider, error) { rollupDialer := func(ctx context.Context, timeout time.Duration, - log log.Logger, url string) (RollupClientInterface, error) { + log log.Logger, url string, + ) (RollupClientInterface, error) { return DialRollupClientWithTimeout(ctx, timeout, log, url) } return newActiveL2RollupProvider(ctx, rollupUrls, checkDuration, networkTimeout, logger, rollupDialer) @@ -64,23 +65,16 @@ func newActiveL2RollupProvider( rollupUrls: rollupUrls, rollupDialer: dialer, clientLock: &sync.Mutex{}, - rollupIndex: -1, } cctx, cancel := context.WithTimeout(ctx, networkTimeout) defer cancel() - err := p.ensureClientInitialized(cctx) - if err != nil { - p.log.Warn("Error dialing initial rollup client.", "err", err) - } - _, err = p.RollupClient(cctx) - if err != nil { + + if _, err := p.RollupClient(cctx); err != nil { return nil, fmt.Errorf("setting provider rollup client: %w", err) } return p, nil } -var errSeqUnset = errors.New("sequencer unset") - func (p *ActiveL2RollupProvider) RollupClient(ctx context.Context) (RollupClientInterface, error) { p.clientLock.Lock() defer p.clientLock.Unlock() @@ -106,37 +100,46 @@ func (p *ActiveL2RollupProvider) shouldCheck() bool { return time.Now().After(p.activeTimeout) } -func (p *ActiveL2RollupProvider) ensureClientInitialized(ctx context.Context) error { - if p.currentRollupClient != nil { - return nil - } - return p.dialNextSequencer(ctx) -} - func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error { - for range p.rollupUrls { - active, err := p.checkCurrentSequencer(ctx) - if errors.Is(err, errSeqUnset) { - log.Debug("Current sequencer unset.") - } else if ep := p.rollupUrls[p.rollupIndex]; err != nil { - p.log.Warn("Error querying active sequencer, closing connection and trying next.", "err", err, "index", p.rollupIndex, "url", ep) + startIdx := p.rollupIndex + var errs error + for offset := range p.rollupUrls { + idx := (startIdx + offset) % p.numEndpoints() + if offset != 0 || p.currentRollupClient == nil { + if err := p.dialSequencer(ctx, idx); err != nil { + errs = errors.Join(errs, err) + p.log.Warn("Error dialing next sequencer.", "err", err, "index", p.rollupIndex) + continue + } + } + + ep := p.rollupUrls[idx] + if active, err := p.checkCurrentSequencer(ctx); err != nil { + errs = errors.Join(errs, err) + p.log.Warn("Error querying active sequencer, trying next.", "err", err, "index", idx, "url", ep) } else if active { - p.log.Debug("Current sequencer active.", "index", p.rollupIndex, "url", ep) + if offset == 0 { + p.log.Debug("Current sequencer active.", "index", idx, "url", ep) + } else { + p.log.Info("Found new active sequencer.", "index", idx, "url", ep) + } return nil } else { - p.log.Info("Current sequencer inactive, closing connection and trying next.", "index", p.rollupIndex, "url", ep) - } - if err := p.dialNextSequencer(ctx); err != nil { - p.log.Warn("Error dialing next sequencer.", "err", err, "index", p.rollupIndex) + p.log.Info("Sequencer inactive, trying next.", "index", idx, "url", ep) } } - return fmt.Errorf("failed to find an active sequencer, tried following urls: %v", p.rollupUrls) + + // TODO: We exhausted the list of sequencers, close and remove latest client? + // It definitely makes all the existing tests pass, but maybe we remove + // this and adjust the tests, if it makes sense. + if p.currentRollupClient != nil { + p.currentRollupClient.Close() + p.currentRollupClient = nil + } + return fmt.Errorf("failed to find an active sequencer, tried following urls: %v; errs: %w", p.rollupUrls, errs) } func (p *ActiveL2RollupProvider) checkCurrentSequencer(ctx context.Context) (bool, error) { - if p.currentRollupClient == nil { - return false, errSeqUnset - } cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() return p.currentRollupClient.SequencerActive(cctx) @@ -146,13 +149,15 @@ func (p *ActiveL2RollupProvider) numEndpoints() int { return len(p.rollupUrls) } -func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { +// dialSequencer dials the sequencer for the url at the given index. +// If successful, the currentRollupClient and rollupIndex are updated and the +// old rollup client is closed. +func (p *ActiveL2RollupProvider) dialSequencer(ctx context.Context, idx int) error { cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() - p.rollupIndex = (p.rollupIndex + 1) % p.numEndpoints() - ep := p.rollupUrls[p.rollupIndex] - p.log.Info("Dialing next sequencer.", "index", p.rollupIndex, "url", ep) + ep := p.rollupUrls[idx] + p.log.Info("Dialing next sequencer.", "index", idx, "url", ep) rollupClient, err := p.rollupDialer(cctx, p.networkTimeout, p.log, ep) if err != nil { return fmt.Errorf("dialing rollup client: %w", err) @@ -160,6 +165,7 @@ func (p *ActiveL2RollupProvider) dialNextSequencer(ctx context.Context) error { if p.currentRollupClient != nil { p.currentRollupClient.Close() } + p.rollupIndex = idx p.currentRollupClient = rollupClient return nil } From aaaf31265d137b90f365b235d426f5d6f022603a Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 19 Dec 2023 10:10:11 -0500 Subject: [PATCH 37/41] Fix some tests. --- op-service/dial/active_l2_provider_test.go | 38 ++-------------------- 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 2d633426175c..1063d0786c67 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -699,40 +699,8 @@ func TestEndpointProvider_ReturnsSameSequencerOnInactiveWithLongCheckDuration(t ept.assertAllExpectations(t) } -// TestRollupProvider_HandlesIndexClientMismatch verifies -func TestRollupProvider_HandlesIndexClientMismatch(t *testing.T) { - ept := setupEndpointProviderTest(t, 2) - primarySequencer, secondarySequencer := ept.rollupClients[0], ept.rollupClients[1] - - // primarySequencer is active on creation - primarySequencer.ExpectSequencerActive(true, nil) // active on creation - rollupProvider, err := ept.newActiveL2RollupProvider(0) - require.NoError(t, err) - - primarySequencer.ExpectSequencerActive(false, nil) // ZDD: sequencer goes down - primarySequencer.ExpectClose() - primarySequencer.ExpectSequencerActive(false, nil) // buggy behavior: we shouldn't have to expect this twice! A fixed version will let us remove this - ept.setRollupDialOutcome(1, false) // secondarySequencer fails to dial - // now get the rollupClient, we expect an error - rollupClient, err := rollupProvider.RollupClient(context.Background()) - require.Error(t, err) - require.Nil(t, rollupClient) - require.Same(t, primarySequencer, rollupProvider.currentRollupClient) // if this passes, it's a bug - require.Equal(t, 1, rollupProvider.rollupIndex) - - // now, 0 is still inactive, but 1 becomes dialable and active - primarySequencer.ExpectSequencerActive(false, nil) - primarySequencer.ExpectClose() - secondarySequencer.ExpectSequencerActive(true, nil) - ept.setRollupDialOutcome(1, true) // secondarySequencer dials successfully - - rollupClient, err = rollupProvider.RollupClient(context.Background()) - require.NoError(t, err) - require.Same(t, secondarySequencer, rollupClient) - ept.assertAllExpectations(t) -} - -// what if we did a test with 3 sequencers? +// TestRollupProvider_HandlesManyIndexClientMismatch verifies that the ActiveL2RollupProvider avoids +// the case where the index of the current sequencer does not match the index of the current rollup client. func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { ept := setupEndpointProviderTest(t, 3) seq0, seq1, seq2 := ept.rollupClients[0], ept.rollupClients[1], ept.rollupClients[2] @@ -756,8 +724,6 @@ func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { rollupClient, err := rollupProvider.RollupClient(context.Background()) require.Error(t, err) require.Nil(t, rollupClient) - require.Same(t, seq1, rollupProvider.currentRollupClient) // if this passes, it's a bug! a non-buggy impl would be nil, or at least seq0 - not seq1 - require.Equal(t, 0, rollupProvider.rollupIndex) // internal state is now inconsistent in the buggy impl. // now seq0 is dialable and active From 85568d9e12574ce08cb171177d72acbb84046d50 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 19 Dec 2023 10:25:19 -0500 Subject: [PATCH 38/41] Move usage of ExpectClose to MaybeClose, we don't want to enforce a particular close behavior in these tests. --- op-service/dial/active_l2_provider_test.go | 48 +++++++++++----------- op-service/dial/active_rollup_provider.go | 8 ---- op-service/testutils/mock_eth_client.go | 4 ++ op-service/testutils/mock_rollup_client.go | 2 +- 4 files changed, 29 insertions(+), 33 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 1063d0786c67..90cfd9bc751a 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -152,7 +152,7 @@ func TestRollupProvider_FailoverOnInactiveSequencer(t *testing.T) { require.Same(t, primarySequencer, firstSequencerUsed) primarySequencer.ExpectSequencerActive(false, nil) // become inactive after that - primarySequencer.ExpectClose() + primarySequencer.MaybeClose() secondarySequencer.ExpectSequencerActive(true, nil) secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) @@ -180,8 +180,8 @@ func TestEndpointProvider_FailoverOnInactiveSequencer(t *testing.T) { primarySequencer.ExpectSequencerActive(false, nil) // become inactive after that secondarySequencer.ExpectSequencerActive(true, nil) - primarySequencer.ExpectClose() - ept.ethClients[0].ExpectClose() // we close the ethclient when we switch over to the next sequencer + primarySequencer.MaybeClose() + ept.ethClients[0].MaybeClose() // we close the ethclient when we switch over to the next sequencer secondSequencerUsed, err := activeProvider.EthClient(context.Background()) require.NoError(t, err) require.Same(t, ept.ethClients[1], secondSequencerUsed) @@ -205,7 +205,7 @@ func TestRollupProvider_FailoverOnErroredSequencer(t *testing.T) { require.Same(t, primarySequencer, firstSequencerUsed) primarySequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) // error-out after that - primarySequencer.ExpectClose() + primarySequencer.MaybeClose() secondarySequencer.ExpectSequencerActive(true, nil) secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) require.NoError(t, err) @@ -232,8 +232,8 @@ func TestEndpointProvider_FailoverOnErroredSequencer(t *testing.T) { require.Same(t, primaryEthClient, firstSequencerUsed) primarySequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) // error out after that - primarySequencer.ExpectClose() - primaryEthClient.ExpectClose() + primarySequencer.MaybeClose() + primaryEthClient.MaybeClose() secondarySequencer.ExpectSequencerActive(true, nil) secondSequencerUsed, err := activeProvider.EthClient(context.Background()) @@ -296,7 +296,7 @@ func TestRollupProvider_FailoverAndReturn(t *testing.T) { // Primary becomes inactive, secondary active primarySequencer.ExpectSequencerActive(false, nil) - primarySequencer.ExpectClose() + primarySequencer.MaybeClose() secondarySequencer.ExpectSequencerActive(true, nil) // Fails over to secondary @@ -307,7 +307,7 @@ func TestRollupProvider_FailoverAndReturn(t *testing.T) { // Primary becomes active again, secondary becomes inactive primarySequencer.ExpectSequencerActive(true, nil) secondarySequencer.ExpectSequencerActive(false, nil) - secondarySequencer.ExpectClose() + secondarySequencer.MaybeClose() // Should return to primary thirdSequencerUsed, err := rollupProvider.RollupClient(context.Background()) @@ -330,8 +330,8 @@ func TestEndpointProvider_FailoverAndReturn(t *testing.T) { // Primary becomes inactive, secondary active primarySequencer.ExpectSequencerActive(false, nil) - primarySequencer.ExpectClose() - ept.ethClients[0].ExpectClose() + primarySequencer.MaybeClose() + ept.ethClients[0].MaybeClose() secondarySequencer.ExpectSequencerActive(true, nil) // Fails over to secondary @@ -342,8 +342,8 @@ func TestEndpointProvider_FailoverAndReturn(t *testing.T) { // Primary becomes active again, secondary becomes inactive primarySequencer.ExpectSequencerActive(true, nil) secondarySequencer.ExpectSequencerActive(false, nil) - secondarySequencer.ExpectClose() - ept.ethClients[1].ExpectClose() + secondarySequencer.MaybeClose() + ept.ethClients[1].MaybeClose() // // Should return to primary thirdSequencerUsed, err := endpointProvider.EthClient(context.Background()) @@ -394,7 +394,7 @@ func TestRollupProvider_SelectSecondSequencerIfFirstInactiveAtCreation(t *testin // First sequencer is inactive, second sequencer is active ept.rollupClients[0].ExpectSequencerActive(false, nil) - ept.rollupClients[0].ExpectClose() + ept.rollupClients[0].MaybeClose() ept.rollupClients[1].ExpectSequencerActive(true, nil) rollupProvider, err := ept.newActiveL2RollupProvider(0) @@ -429,7 +429,7 @@ func TestEndpointProvider_SelectSecondSequencerIfFirstOfflineAtCreation(t *testi // First sequencer is inactive, second sequencer is active ept.rollupClients[0].ExpectSequencerActive(false, nil) - ept.rollupClients[0].ExpectClose() + ept.rollupClients[0].MaybeClose() ept.rollupClients[1].ExpectSequencerActive(true, nil) ept.rollupClients[1].ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice @@ -466,7 +466,7 @@ func TestRollupProvider_ConstructorErrorOnFirstSequencerOffline(t *testing.T) { // First sequencer is dead, second sequencer is active ept.rollupClients[0].ExpectSequencerActive(false, fmt.Errorf("I am offline")) - ept.rollupClients[0].ExpectClose() + ept.rollupClients[0].MaybeClose() ept.rollupClients[1].ExpectSequencerActive(true, nil) rollupProvider, err := ept.newActiveL2RollupProvider(0) @@ -483,7 +483,7 @@ func TestEndpointProvider_ConstructorErrorOnFirstSequencerOffline(t *testing.T) // First sequencer is dead, second sequencer is active ept.rollupClients[0].ExpectSequencerActive(false, fmt.Errorf("I am offline")) - ept.rollupClients[0].ExpectClose() + ept.rollupClients[0].MaybeClose() ept.rollupClients[1].ExpectSequencerActive(true, nil) ept.rollupClients[1].ExpectSequencerActive(true, nil) // see comment in other tests about why we expect this twice @@ -502,7 +502,7 @@ func TestRollupProvider_FailOnAllInactiveSequencers(t *testing.T) { // All sequencers are inactive for _, sequencer := range ept.rollupClients { sequencer.ExpectSequencerActive(false, nil) - sequencer.ExpectClose() + sequencer.MaybeClose() } _, err := ept.newActiveL2RollupProvider(0) @@ -518,7 +518,7 @@ func TestEndpointProvider_FailOnAllInactiveSequencers(t *testing.T) { // All sequencers are inactive for _, sequencer := range ept.rollupClients { sequencer.ExpectSequencerActive(false, nil) - sequencer.ExpectClose() + sequencer.MaybeClose() } _, err := ept.newActiveL2EndpointProvider(0) @@ -534,7 +534,7 @@ func TestRollupProvider_FailOnAllErroredSequencers(t *testing.T) { // All sequencers are inactive for _, sequencer := range ept.rollupClients { sequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) - sequencer.ExpectClose() + sequencer.MaybeClose() } _, err := ept.newActiveL2RollupProvider(0) @@ -550,7 +550,7 @@ func TestEndpointProvider_FailOnAllErroredSequencers(t *testing.T) { // All sequencers are inactive for _, sequencer := range ept.rollupClients { sequencer.ExpectSequencerActive(true, fmt.Errorf("a test error")) - sequencer.ExpectClose() + sequencer.MaybeClose() } _, err := ept.newActiveL2EndpointProvider(0) @@ -614,7 +614,7 @@ func TestRollupProvider_ErrorWhenAllSequencersInactive(t *testing.T) { // All sequencers become inactive for _, sequencer := range ept.rollupClients { sequencer.ExpectSequencerActive(false, nil) - sequencer.ExpectClose() + sequencer.MaybeClose() } _, err = rollupProvider.RollupClient(context.Background()) @@ -635,7 +635,7 @@ func TestEndpointProvider_ErrorWhenAllSequencersInactive(t *testing.T) { // All sequencers become inactive for _, sequencer := range ept.rollupClients { sequencer.ExpectSequencerActive(false, nil) - sequencer.ExpectClose() + sequencer.MaybeClose() } _, err = endpointProvider.EthClient(context.Background()) @@ -712,11 +712,11 @@ func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { // primarySequencer goes down seq0.ExpectSequencerActive(false, fmt.Errorf("I'm offline now")) - seq0.ExpectClose() + seq0.MaybeClose() ept.setRollupDialOutcome(0, false) // primarySequencer fails to dial // secondarySequencer is inactive, but online seq1.ExpectSequencerActive(false, nil) - seq1.ExpectClose() + seq1.MaybeClose() seq1.ExpectSequencerActive(false, nil) // a non-buggy impl shouldn't need this line. // tertiarySequencer can't even be dialed ept.setRollupDialOutcome(2, false) diff --git a/op-service/dial/active_rollup_provider.go b/op-service/dial/active_rollup_provider.go index d982559c499c..76f30b3da99a 100644 --- a/op-service/dial/active_rollup_provider.go +++ b/op-service/dial/active_rollup_provider.go @@ -128,14 +128,6 @@ func (p *ActiveL2RollupProvider) findActiveEndpoints(ctx context.Context) error p.log.Info("Sequencer inactive, trying next.", "index", idx, "url", ep) } } - - // TODO: We exhausted the list of sequencers, close and remove latest client? - // It definitely makes all the existing tests pass, but maybe we remove - // this and adjust the tests, if it makes sense. - if p.currentRollupClient != nil { - p.currentRollupClient.Close() - p.currentRollupClient = nil - } return fmt.Errorf("failed to find an active sequencer, tried following urls: %v; errs: %w", p.rollupUrls, errs) } diff --git a/op-service/testutils/mock_eth_client.go b/op-service/testutils/mock_eth_client.go index 8c2ef6f3015c..1ca1c953a497 100644 --- a/op-service/testutils/mock_eth_client.go +++ b/op-service/testutils/mock_eth_client.go @@ -143,6 +143,10 @@ func (m *MockEthClient) ExpectClose() { m.Mock.On("Close").Once() } +func (m *MockEthClient) MaybeClose() { + m.Mock.On("Close").Maybe() +} + func (m *MockEthClient) Close() { m.Mock.Called() } diff --git a/op-service/testutils/mock_rollup_client.go b/op-service/testutils/mock_rollup_client.go index 637799b9d325..a7a42e878481 100644 --- a/op-service/testutils/mock_rollup_client.go +++ b/op-service/testutils/mock_rollup_client.go @@ -63,7 +63,7 @@ func (m *MockRollupClient) ExpectClose() { } func (m *MockRollupClient) MaybeClose() { - m.Mock.On("Close").Once().Maybe() + m.Mock.On("Close").Maybe() } func (m *MockRollupClient) Close() { From c7bbd5fc296fcfc48995d26fae0e61682b028b55 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 19 Dec 2023 10:27:54 -0500 Subject: [PATCH 39/41] add a missing call to assertAllExpectations. --- op-service/dial/active_l2_provider_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 90cfd9bc751a..102ca017f949 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -717,14 +717,13 @@ func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { // secondarySequencer is inactive, but online seq1.ExpectSequencerActive(false, nil) seq1.MaybeClose() - seq1.ExpectSequencerActive(false, nil) // a non-buggy impl shouldn't need this line. // tertiarySequencer can't even be dialed ept.setRollupDialOutcome(2, false) // after calling RollupClient, index will be set to 0, but currentRollupClient is secondarySequencer rollupClient, err := rollupProvider.RollupClient(context.Background()) require.Error(t, err) require.Nil(t, rollupClient) - // internal state is now inconsistent in the buggy impl. + // internal state would now be inconsistent in the buggy impl. // now seq0 is dialable and active ept.setRollupDialOutcome(0, true) @@ -741,4 +740,5 @@ func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { rollupClient, err = rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, seq0, rollupClient) + ept.assertAllExpectations(t) } From 81befc86876a6a0cc95ad78249d90c6f79c0278c Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 19 Dec 2023 14:33:54 -0500 Subject: [PATCH 40/41] Test even the case where the active providers are managing a list of 1 element. --- op-service/dial/active_l2_provider.go | 16 +++++-- op-service/dial/active_l2_provider_test.go | 56 ++++++++++++++++++++-- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 5eb3fadbc050..7b22b54ab6f2 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -81,13 +81,17 @@ func newActiveL2EndpointProvider( func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { p.clientLock.Lock() - defer p.clientLock.Unlock() err := p.ensureActiveEndpoint(ctx) if err != nil { + p.clientLock.Unlock() return nil, err } - if p.ethClientIndex != p.rollupIndex || p.currentEthClient == nil { - // we changed sequencers, dial a new EthClient + currentClient := p.currentEthClient + shouldDial := p.ethClientIndex != p.rollupIndex || currentClient == nil + p.clientLock.Unlock() + + if shouldDial { + // Dialing logic outside of the lock cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() idx := p.rollupIndex @@ -97,11 +101,13 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - if p.currentEthClient != nil { - p.currentEthClient.Close() + if currentClient != nil { + currentClient.Close() } + p.clientLock.Lock() p.ethClientIndex = idx p.currentEthClient = ethClient + p.clientLock.Unlock() } return p.currentEthClient, nil } diff --git a/op-service/dial/active_l2_provider_test.go b/op-service/dial/active_l2_provider_test.go index 102ca017f949..5790c3dbd71c 100644 --- a/op-service/dial/active_l2_provider_test.go +++ b/op-service/dial/active_l2_provider_test.go @@ -719,11 +719,14 @@ func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { seq1.MaybeClose() // tertiarySequencer can't even be dialed ept.setRollupDialOutcome(2, false) - // after calling RollupClient, index will be set to 0, but currentRollupClient is secondarySequencer + // In a prior buggy implementation, this scenario lead to an internal inconsistent state + // where the current client didn't match the index. On a subsequent try, this led to the + // active sequencer at 0 to be skipped entirely, while the sequencer at index 1 + // was checked twice. rollupClient, err := rollupProvider.RollupClient(context.Background()) require.Error(t, err) require.Nil(t, rollupClient) - // internal state would now be inconsistent in the buggy impl. + // internal state would now be inconsistent in a buggy impl. // now seq0 is dialable and active ept.setRollupDialOutcome(0, true) @@ -736,9 +739,56 @@ func TestRollupProvider_HandlesManyIndexClientMismatch(t *testing.T) { ept.setRollupDialOutcome(2, true) seq2.ExpectSequencerActive(false, nil) seq2.MaybeClose() - // now trigger the bug by calling for the client + // this would trigger the prior bug: request the rollup client. rollupClient, err = rollupProvider.RollupClient(context.Background()) require.NoError(t, err) require.Same(t, seq0, rollupClient) ept.assertAllExpectations(t) } + +// TestRollupProvider_HandlesSingleSequencer verifies that the ActiveL2RollupProvider +// can handle being passed a single sequencer endpoint without issue. +func TestRollupProvider_HandlesSingleSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 1) + onlySequencer := ept.rollupClients[0] + onlySequencer.ExpectSequencerActive(true, nil) // respond true once on creation + + rollupProvider, err := ept.newActiveL2RollupProvider(0) + require.NoError(t, err) + + onlySequencer.ExpectSequencerActive(true, nil) // respond true again when the test calls `RollupClient()` the first time + firstSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.NoError(t, err) + require.Same(t, onlySequencer, firstSequencerUsed) + + onlySequencer.ExpectSequencerActive(false, nil) // become inactive after that + onlySequencer.MaybeClose() + secondSequencerUsed, err := rollupProvider.RollupClient(context.Background()) + require.Error(t, err) + require.Nil(t, secondSequencerUsed) + ept.assertAllExpectations(t) +} + +// TestEndpointProvider_HandlesSingleSequencer verifies that the ActiveL2EndpointProvider +// can handle being passed a single sequencer endpoint without issue. +func TestEndpointProvider_HandlesSingleSequencer(t *testing.T) { + ept := setupEndpointProviderTest(t, 1) + onlySequencer := ept.rollupClients[0] + onlySequencer.ExpectSequencerActive(true, nil) // respond true once on creation + onlySequencer.ExpectSequencerActive(true, nil) // respond true again when the constructor calls `RollupClient()` + + endpointProvider, err := ept.newActiveL2EndpointProvider(0) + require.NoError(t, err) + + onlySequencer.ExpectSequencerActive(true, nil) // respond true a once more on fall-through check in `EthClient()` + firstEthClientUsed, err := endpointProvider.EthClient(context.Background()) + require.NoError(t, err) + require.Same(t, ept.ethClients[0], firstEthClientUsed) + + onlySequencer.ExpectSequencerActive(false, nil) // become inactive after that + onlySequencer.MaybeClose() + secondEthClientUsed, err := endpointProvider.EthClient(context.Background()) + require.Error(t, err) + require.Nil(t, secondEthClientUsed) + ept.assertAllExpectations(t) +} From be8ad6ace51813ce6b3c66e7a33ab885528b3ef3 Mon Sep 17 00:00:00 2001 From: EvanJRichard Date: Tue, 19 Dec 2023 16:38:58 -0500 Subject: [PATCH 41/41] Revert experimental hunk in active_l2_provider. --- op-service/dial/active_l2_provider.go | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/op-service/dial/active_l2_provider.go b/op-service/dial/active_l2_provider.go index 7b22b54ab6f2..5eb3fadbc050 100644 --- a/op-service/dial/active_l2_provider.go +++ b/op-service/dial/active_l2_provider.go @@ -81,17 +81,13 @@ func newActiveL2EndpointProvider( func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInterface, error) { p.clientLock.Lock() + defer p.clientLock.Unlock() err := p.ensureActiveEndpoint(ctx) if err != nil { - p.clientLock.Unlock() return nil, err } - currentClient := p.currentEthClient - shouldDial := p.ethClientIndex != p.rollupIndex || currentClient == nil - p.clientLock.Unlock() - - if shouldDial { - // Dialing logic outside of the lock + if p.ethClientIndex != p.rollupIndex || p.currentEthClient == nil { + // we changed sequencers, dial a new EthClient cctx, cancel := context.WithTimeout(ctx, p.networkTimeout) defer cancel() idx := p.rollupIndex @@ -101,13 +97,11 @@ func (p *ActiveL2EndpointProvider) EthClient(ctx context.Context) (EthClientInte if err != nil { return nil, fmt.Errorf("dialing eth client: %w", err) } - if currentClient != nil { - currentClient.Close() + if p.currentEthClient != nil { + p.currentEthClient.Close() } - p.clientLock.Lock() p.ethClientIndex = idx p.currentEthClient = ethClient - p.clientLock.Unlock() } return p.currentEthClient, nil }