Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b96ef75
Initial working impl
Fletch153 Jul 15, 2025
5e1f6d9
update naming from metric to status
Fletch153 Jul 15, 2025
9feaab9
Fixes + update tests
Fletch153 Jul 16, 2025
a9e36ad
Small fixes
Fletch153 Jul 17, 2025
93bd516
Migrate JSON beholder msg to protobuf
Fletch153 Jul 18, 2025
60cc0d0
Add job idenfication to polling
Fletch153 Jul 18, 2025
b20257d
Include external job IDs
Fletch153 Jul 18, 2025
ae55487
Add job name to beholder output
Fletch153 Jul 22, 2025
1dc1dc6
Rename ea status to bridge status
Fletch153 Jul 22, 2025
da649bd
Fixed issue with protobuf marshaling
Fletch153 Jul 23, 2025
208fec0
Fixed issues with beholder not correctly emitting
Fletch153 Jul 23, 2025
31b9c0e
Fixed issue sending nil values
Fletch153 Jul 23, 2025
b348d5f
add go generate d.
Fletch153 Jul 24, 2025
79a5f6e
Add README
Fletch153 Jul 24, 2025
60ff386
Changeset
Fletch153 Jul 24, 2025
3c89b47
Merge branch 'develop' into feature/DF-21286/add_additional_telemetry
Fletch153 Jul 24, 2025
877c29e
Fix build issues
Fletch153 Jul 25, 2025
08daf07
go gen
Fletch153 Jul 25, 2025
5664d0d
Fixed PR check issues
Fletch153 Jul 25, 2025
9c7c358
Update test fixtures
Fletch153 Jul 25, 2025
608aa67
Additional test fixes
Fletch153 Jul 25, 2025
89d3650
PR fixes
Fletch153 Jul 28, 2025
73be6f5
Fix racey tests
Fletch153 Jul 28, 2025
0ce4327
Fix test failure
Fletch153 Jul 28, 2025
1a4af80
Improved configuration reslilience
Fletch153 Jul 29, 2025
2a16e57
Merge branch 'develop' into feature/DF-21286/add_additional_telemetry
Fletch153 Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/floppy-parts-argue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": minor
---

Added Bridge Status Reporter Service that polls external adapter status endpoints and emits telemetry events for operational monitoring #nops #added
1 change: 1 addition & 0 deletions core/config/app_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type AppConfig interface {
Telemetry() Telemetry
CRE() CRE
Billing() Billing
BridgeStatusReporter() BridgeStatusReporter
}

type DatabaseBackupMode string
Expand Down
13 changes: 13 additions & 0 deletions core/config/bridge_status_config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package config

import "time"

const MinimumPollingInterval = time.Minute

type BridgeStatusReporter interface {
Enabled() bool
StatusPath() string
PollingInterval() time.Duration
IgnoreInvalidBridges() bool
IgnoreJoblessBridges() bool
}
13 changes: 13 additions & 0 deletions core/config/docs/core.toml
Original file line number Diff line number Diff line change
Expand Up @@ -827,3 +827,16 @@ URL = '' # Default
URL = "localhost:4319" # Default
# TLSEnabled enables TLS to be used to secure communication with the billing service. This is enabled by default.
TLSEnabled = true # Default

# BridgeStatusReporter holds settings for the Bridge Status Reporter service.
[BridgeStatusReporter]
# Enabled enables the Bridge Status Reporter service that polls bridge status endpoints.
Enabled = false # Default
# StatusPath is the path to append to bridge URLs for status polling.
StatusPath = "/status" # Default
# PollingInterval is how often to poll bridge status endpoints for status.
PollingInterval = "5m" # Default
# IgnoreInvalidBridges skips bridges that return HTTP errors or invalid responses.
IgnoreInvalidBridges = true # Default
# IgnoreJoblessBridges skips bridges that have no associated jobs.
IgnoreJoblessBridges = false # Default
108 changes: 84 additions & 24 deletions core/config/toml/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,31 @@ type Core struct {
RootDir *string
ShutdownGracePeriod *commonconfig.Duration

Feature Feature `toml:",omitempty"`
Database Database `toml:",omitempty"`
TelemetryIngress TelemetryIngress `toml:",omitempty"`
AuditLogger AuditLogger `toml:",omitempty"`
Log Log `toml:",omitempty"`
WebServer WebServer `toml:",omitempty"`
JobDistributor JobDistributor `toml:",omitempty"`
JobPipeline JobPipeline `toml:",omitempty"`
FluxMonitor FluxMonitor `toml:",omitempty"`
OCR2 OCR2 `toml:",omitempty"`
OCR OCR `toml:",omitempty"`
P2P P2P `toml:",omitempty"`
Keeper Keeper `toml:",omitempty"`
AutoPprof AutoPprof `toml:",omitempty"`
Pyroscope Pyroscope `toml:",omitempty"`
Sentry Sentry `toml:",omitempty"`
Insecure Insecure `toml:",omitempty"`
Tracing Tracing `toml:",omitempty"`
Mercury Mercury `toml:",omitempty"`
Capabilities Capabilities `toml:",omitempty"`
Telemetry Telemetry `toml:",omitempty"`
Workflows Workflows `toml:",omitempty"`
CRE CreConfig `toml:",omitempty"`
Billing Billing `toml:",omitempty"`
Feature Feature `toml:",omitempty"`
Database Database `toml:",omitempty"`
TelemetryIngress TelemetryIngress `toml:",omitempty"`
AuditLogger AuditLogger `toml:",omitempty"`
Log Log `toml:",omitempty"`
WebServer WebServer `toml:",omitempty"`
JobDistributor JobDistributor `toml:",omitempty"`
JobPipeline JobPipeline `toml:",omitempty"`
FluxMonitor FluxMonitor `toml:",omitempty"`
OCR2 OCR2 `toml:",omitempty"`
OCR OCR `toml:",omitempty"`
P2P P2P `toml:",omitempty"`
Keeper Keeper `toml:",omitempty"`
AutoPprof AutoPprof `toml:",omitempty"`
Pyroscope Pyroscope `toml:",omitempty"`
Sentry Sentry `toml:",omitempty"`
Insecure Insecure `toml:",omitempty"`
Tracing Tracing `toml:",omitempty"`
Mercury Mercury `toml:",omitempty"`
Capabilities Capabilities `toml:",omitempty"`
Telemetry Telemetry `toml:",omitempty"`
Workflows Workflows `toml:",omitempty"`
CRE CreConfig `toml:",omitempty"`
Billing Billing `toml:",omitempty"`
BridgeStatusReporter BridgeStatusReporter `toml:",omitempty"`
}

// SetFrom updates c with any non-nil values from f. (currently TOML field only!)
Expand Down Expand Up @@ -107,6 +108,7 @@ func (c *Core) SetFrom(f *Core) {
c.Telemetry.setFrom(&f.Telemetry)
c.CRE.setFrom(&f.CRE)
c.Billing.setFrom(&f.Billing)
c.BridgeStatusReporter.setFrom(&f.BridgeStatusReporter)
}

func (c *Core) ValidateConfig() (err error) {
Expand Down Expand Up @@ -2273,6 +2275,64 @@ func (b *Billing) ValidateConfig() error {
return nil
}

type BridgeStatusReporter struct {
Enabled *bool
StatusPath *string
PollingInterval *commonconfig.Duration
IgnoreInvalidBridges *bool
IgnoreJoblessBridges *bool
}

func (e *BridgeStatusReporter) setFrom(f *BridgeStatusReporter) {
if f.Enabled != nil {
e.Enabled = f.Enabled
}
if f.StatusPath != nil {
e.StatusPath = f.StatusPath
}
if f.PollingInterval != nil {
e.PollingInterval = f.PollingInterval
}
if f.IgnoreInvalidBridges != nil {
e.IgnoreInvalidBridges = f.IgnoreInvalidBridges
}
if f.IgnoreJoblessBridges != nil {
e.IgnoreJoblessBridges = f.IgnoreJoblessBridges
}
}

func (e *BridgeStatusReporter) ValidateConfig() error {
if e.Enabled == nil || !*e.Enabled {
return nil
}

// Default values when enabled
if e.StatusPath == nil || *e.StatusPath == "" {
defaultPath := "/status"
e.StatusPath = &defaultPath
}

if e.PollingInterval == nil {
return configutils.ErrInvalid{Name: "PollingInterval", Value: nil, Msg: "must be set"}
}

if e.PollingInterval.Duration() < config.MinimumPollingInterval {
return configutils.ErrInvalid{Name: "PollingInterval", Value: e.PollingInterval.Duration(), Msg: "must be greater than or equal to: " + config.MinimumPollingInterval.String()}
}

if e.IgnoreInvalidBridges == nil {
defaultIgnoreInvalid := true
e.IgnoreInvalidBridges = &defaultIgnoreInvalid
}

if e.IgnoreJoblessBridges == nil {
defaultIgnoreJobless := false
e.IgnoreJoblessBridges = &defaultIgnoreJobless
}

return nil
}

type JobDistributor struct {
DisplayName *string
}
Expand Down
164 changes: 164 additions & 0 deletions core/config/toml/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/url"
"strings"
"testing"
"time"

"github.com/pelletier/go-toml/v2"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -636,3 +637,166 @@ func TestEthKeys_SetFrom(t *testing.T) {

// ptr is a utility function for converting a value to a pointer to the value.
func ptr[T any](t T) *T { return &t }

func TestBridgeStatusReporter_ValidateConfig(t *testing.T) {
testCases := []struct {
name string
config *BridgeStatusReporter
expectError bool
errorMsg string
}{
{
name: "disabled with nil fields",
config: &BridgeStatusReporter{
Enabled: ptr(false),
StatusPath: nil,
PollingInterval: nil,
IgnoreInvalidBridges: nil,
IgnoreJoblessBridges: nil,
},
expectError: false,
},
{
name: "disabled with empty fields",
config: &BridgeStatusReporter{
Enabled: ptr(false),
StatusPath: ptr(""),
PollingInterval: durationPtr(0),
IgnoreInvalidBridges: ptr(false),
IgnoreJoblessBridges: ptr(true),
},
expectError: false,
},
{
name: "disabled with valid fields",
config: &BridgeStatusReporter{
Enabled: ptr(false),
StatusPath: ptr("/status"),
PollingInterval: durationPtr(5 * time.Minute),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: false,
},
{
name: "nil enabled (defaults to disabled)",
config: &BridgeStatusReporter{
Enabled: nil,
StatusPath: ptr("/status"),
PollingInterval: durationPtr(5 * time.Minute),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: false,
},
// Enabled valid cases with auto-defaulting
{
name: "enabled with valid config",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: ptr("/status"),
PollingInterval: durationPtr(5 * time.Minute),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: false,
},
{
name: "enabled with nil fields - should fail validation",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: nil,
PollingInterval: nil,
IgnoreInvalidBridges: nil,
IgnoreJoblessBridges: nil,
},
expectError: true,
errorMsg: "must be set",
},
{
name: "enabled with empty status path - should auto-default",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: ptr(""),
PollingInterval: durationPtr(5 * time.Minute),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: false,
},
{
name: "enabled with zero polling interval - should fail validation",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: ptr("/status"),
PollingInterval: durationPtr(0),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: true,
errorMsg: "must be greater than or equal to: 1m",
},
{
name: "enabled with polling interval less than 1 minute - should fail validation",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: ptr("/status"),
PollingInterval: durationPtr(30 * time.Second),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: true,
errorMsg: "must be greater than or equal to: 1m",
},
{
name: "enabled with polling interval exactly 1 minute",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: ptr("/status"),
PollingInterval: durationPtr(1 * time.Minute),
IgnoreInvalidBridges: ptr(true),
IgnoreJoblessBridges: ptr(false),
},
expectError: false,
},
{
name: "enabled with all fields missing - should fail validation",
config: &BridgeStatusReporter{
Enabled: ptr(true),
StatusPath: ptr(""),
PollingInterval: durationPtr(0),
IgnoreInvalidBridges: nil,
IgnoreJoblessBridges: nil,
},
expectError: true,
errorMsg: "must be greater than or equal to: 1m",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := tc.config.ValidateConfig()
if tc.expectError {
require.Error(t, err)
assert.Contains(t, err.Error(), tc.errorMsg)
} else {
assert.NoError(t, err)

// Verify defaults are set when enabled
if tc.config.Enabled != nil && *tc.config.Enabled {
assert.NotNil(t, tc.config.StatusPath)
assert.NotEmpty(t, *tc.config.StatusPath)
assert.NotNil(t, tc.config.PollingInterval)
assert.GreaterOrEqual(t, tc.config.PollingInterval.Duration(), time.Minute)
assert.NotNil(t, tc.config.IgnoreInvalidBridges)
assert.NotNil(t, tc.config.IgnoreJoblessBridges)
}
}
})
}
}

func durationPtr(d time.Duration) *commonconfig.Duration {
cd := *commonconfig.MustNewDuration(d)
return &cd
}
12 changes: 12 additions & 0 deletions core/services/chainlink/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"go.uber.org/zap/zapcore"
"google.golang.org/grpc/credentials"

"github.com/smartcontractkit/chainlink-common/pkg/beholder"
"github.com/smartcontractkit/chainlink-common/pkg/billing"
"github.com/smartcontractkit/chainlink-common/pkg/custmsg"
"github.com/smartcontractkit/chainlink-common/pkg/loop"
Expand Down Expand Up @@ -64,6 +65,7 @@ import (
"github.com/smartcontractkit/chainlink/v2/core/services/keeper"
"github.com/smartcontractkit/chainlink/v2/core/services/keystore"
"github.com/smartcontractkit/chainlink/v2/core/services/llo/retirement"
"github.com/smartcontractkit/chainlink/v2/core/services/nodestatusreporter/bridgestatus"
"github.com/smartcontractkit/chainlink/v2/core/services/ocr"
"github.com/smartcontractkit/chainlink/v2/core/services/ocr2"
"github.com/smartcontractkit/chainlink/v2/core/services/ocrbootstrap"
Expand Down Expand Up @@ -649,6 +651,16 @@ func NewApplication(ctx context.Context, opts ApplicationOpts) (Application, err
globalLogger.Debug("Off-chain reporting v2 disabled")
}

bridgeStatusReporter := bridgestatus.NewBridgeStatusReporter(
cfg.BridgeStatusReporter(),
bridgeORM,
jobORM,
unrestrictedHTTPClient,
beholder.GetEmitter(),
globalLogger,
)
srvcs = append(srvcs, bridgeStatusReporter)

healthChecker := commonservices.NewChecker(static.Version, static.Sha)

var lbs []utils.DependentAwaiter
Expand Down
Loading
Loading