Skip to content

Commit

Permalink
Address code review comments III
Browse files Browse the repository at this point in the history
Signed-off-by: Yacov Manevich <yacov.manevich@avalabs.org>
  • Loading branch information
yacovm committed Mar 2, 2025
1 parent db2b37f commit 0873221
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 319 deletions.
17 changes: 9 additions & 8 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,14 @@ func getNetworkConfig(
},

HealthConfig: network.HealthConfig{
Enabled: sybilProtectionEnabled,
MaxTimeSinceMsgSent: v.GetDuration(NetworkHealthMaxTimeSinceMsgSentKey),
MaxTimeSinceMsgReceived: v.GetDuration(NetworkHealthMaxTimeSinceMsgReceivedKey),
MaxPortionSendQueueBytesFull: v.GetFloat64(NetworkHealthMaxPortionSendQueueFillKey),
MinConnectedPeers: v.GetUint(NetworkHealthMinPeersKey),
MaxSendFailRate: v.GetFloat64(NetworkHealthMaxSendFailRateKey),
SendFailRateHalflife: halflife,
Enabled: sybilProtectionEnabled,
MaxTimeSinceMsgSent: v.GetDuration(NetworkHealthMaxTimeSinceMsgSentKey),
MaxTimeSinceMsgReceived: v.GetDuration(NetworkHealthMaxTimeSinceMsgReceivedKey),
MaxPortionSendQueueBytesFull: v.GetFloat64(NetworkHealthMaxPortionSendQueueFillKey),
MinConnectedPeers: v.GetUint(NetworkHealthMinPeersKey),
MaxSendFailRate: v.GetFloat64(NetworkHealthMaxSendFailRateKey),
SendFailRateHalflife: halflife,
AllowNoIngressValidatorConnections: v.GetBool(NetworkAllowNoIngressValidatorConnectionKey),
},

ProxyEnabled: v.GetBool(NetworkTCPProxyEnabledKey),
Expand Down Expand Up @@ -1431,7 +1432,7 @@ func GetNodeConfig(v *viper.Viper) (node.Config, error) {
nodeConfig.SystemTrackerProcessingHalflife = v.GetDuration(SystemTrackerProcessingHalflifeKey)
nodeConfig.SystemTrackerCPUHalflife = v.GetDuration(SystemTrackerCPUHalflifeKey)
nodeConfig.SystemTrackerDiskHalflife = v.GetDuration(SystemTrackerDiskHalflifeKey)
nodeConfig.NoIngressValidatorConnectionTimeout = v.GetDuration(NoIngressValidatorConnectionTimeoutKey)
nodeConfig.NoIngressValidatorConnection = v.GetBool(NetworkAllowNoIngressValidatorConnectionKey)

nodeConfig.RequiredAvailableDiskSpace, nodeConfig.WarningThresholdAvailableDiskSpace, err = getDiskSpaceConfig(v)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func addNodeFlags(fs *pflag.FlagSet) {
fs.Duration(NetworkReadHandshakeTimeoutKey, constants.DefaultNetworkReadHandshakeTimeout, "Timeout value for reading handshake messages")
fs.Duration(NetworkPingTimeoutKey, constants.DefaultPingPongTimeout, "Timeout value for Ping-Pong with a peer")
fs.Duration(NetworkPingFrequencyKey, constants.DefaultPingFrequency, "Frequency of pinging other peers")
fs.Duration(NoIngressValidatorConnectionTimeoutKey, constants.DefaultNoIngressValidatorConnectionTimeout, "Time after which nodes are expected to be connected to us if we are a primary network validator, otherwise a health check fails")
fs.Duration(NetworkAllowNoIngressValidatorConnectionKey, constants.DefaultNoIngressValidatorConnectionTimeout, "Time after which nodes are expected to be connected to us if we are a primary network validator, otherwise a health check fails")
fs.String(NetworkCompressionTypeKey, constants.DefaultNetworkCompressionType.String(), fmt.Sprintf("Compression type for outbound messages. Must be one of [%s, %s]", compression.TypeZstd, compression.TypeNone))

fs.Duration(NetworkMaxClockDifferenceKey, constants.DefaultNetworkMaxClockDifference, "Max allowed clock difference value between this node and peers")
Expand Down
2 changes: 1 addition & 1 deletion config/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ const (
NetworkInboundThrottlerMaxConnsPerSecKey = "network-inbound-connection-throttling-max-conns-per-sec"
NetworkOutboundConnectionThrottlingRpsKey = "network-outbound-connection-throttling-rps"
NetworkOutboundConnectionTimeoutKey = "network-outbound-connection-timeout"
NetworkAllowNoIngressValidatorConnectionKey = "network-allow-no-ingress-validator-connection"
BenchlistFailThresholdKey = "benchlist-fail-threshold"
BenchlistDurationKey = "benchlist-duration"
BenchlistMinFailingDurationKey = "benchlist-min-failing-duration"
Expand Down Expand Up @@ -190,7 +191,6 @@ const (
SystemTrackerDiskHalflifeKey = "system-tracker-disk-halflife"
SystemTrackerRequiredAvailableDiskSpaceKey = "system-tracker-disk-required-available-space"
SystemTrackerWarningThresholdAvailableDiskSpaceKey = "system-tracker-disk-warning-threshold-available-space"
NoIngressValidatorConnectionTimeoutKey = "no-ingress-validator-connection-timeout"
DiskVdrAllocKey = "throttler-inbound-disk-validator-alloc"
DiskMaxNonVdrUsageKey = "throttler-inbound-disk-max-non-validator-usage"
DiskMaxNonVdrNodeUsageKey = "throttler-inbound-disk-max-non-validator-node-usage"
Expand Down
6 changes: 3 additions & 3 deletions config/node/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,9 @@ type Config struct {
// Larger halflife --> disk usage metrics change more slowly.
SystemTrackerDiskHalflife time.Duration `json:"systemTrackerDiskHalflife"`

// NoIngressValidatorConnectionTimeout denotes the time after which at least one
// node is expected to be connected to us if we validate the primary network, otherwise a health check fails.
NoIngressValidatorConnectionTimeout time.Duration `json:"noIngressValidatorConnectionTimeout"`
// NoIngressValidatorConnection makes the health check fail if the node is a primary network validator
// and no node has opened a connection to it.
NoIngressValidatorConnection bool `json:"noIngressValidatorConnectionTimeout"`

CPUTargeterConfig tracker.TargeterConfig `json:"cpuTargeterConfig"`

Expand Down
4 changes: 4 additions & 0 deletions network/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ type HealthConfig struct {
// Marks if the health check should be enabled
Enabled bool `json:"-"`

// AllowNoIngressValidatorConnections denotes whether the health check does not fail
// for primary network validators with no ingress connections.
AllowNoIngressValidatorConnections bool

// MinConnectedPeers is the minimum number of peers that the network should
// be connected to be considered healthy.
MinConnectedPeers uint `json:"minConnectedPeers"`
Expand Down
72 changes: 41 additions & 31 deletions network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ import (
)

const (
ConnectedPeersKey = "connectedPeers"
TimeSinceLastMsgReceivedKey = "timeSinceLastMsgReceived"
TimeSinceLastMsgSentKey = "timeSinceLastMsgSent"
SendFailRateKey = "sendFailRate"
PrimaryNetworkValidatorHealthKey = "primary network validator health"
ConnectedPeersKey = "connectedPeers"
TimeSinceLastMsgReceivedKey = "timeSinceLastMsgReceived"
TimeSinceLastMsgSentKey = "timeSinceLastMsgSent"
SendFailRateKey = "sendFailRate"
)

var (
Expand Down Expand Up @@ -156,6 +157,8 @@ type network struct {
connectedPeers peer.Set
closing bool

ingressConnAlerter noIngressConnAlert

// router is notified about all peer [Connected] and [Disconnected] events
// as well as all non-handshake peer messages.
//
Expand Down Expand Up @@ -259,34 +262,29 @@ func NewNetwork(
ipTracker.ManuallyTrack(nodeID)
}

// ingressConnCount tracks connections to all remote peers,
// so for safety we set it as pointer, in case the config object is copied.
var ingressConnCount atomic.Uint64

peerConfig := &peer.Config{
ReadBufferSize: config.PeerReadBufferSize,
WriteBufferSize: config.PeerWriteBufferSize,
Metrics: peerMetrics,
MessageCreator: msgCreator,
IngressConnectionCount: &ingressConnCount,
Log: log,
InboundMsgThrottler: inboundMsgThrottler,
Network: nil, // This is set below.
Router: router,
VersionCompatibility: version.GetCompatibility(minCompatibleTime),
MyNodeID: config.MyNodeID,
MySubnets: config.TrackedSubnets,
Beacons: config.Beacons,
Validators: config.Validators,
NetworkID: config.NetworkID,
PingFrequency: config.PingFrequency,
PongTimeout: config.PingPongTimeout,
MaxClockDifference: config.MaxClockDifference,
SupportedACPs: config.SupportedACPs.List(),
ObjectedACPs: config.ObjectedACPs.List(),
ResourceTracker: config.ResourceTracker,
UptimeCalculator: config.UptimeCalculator,
IPSigner: peer.NewIPSigner(config.MyIPPort, config.TLSKey, config.BLSKey),
ReadBufferSize: config.PeerReadBufferSize,
WriteBufferSize: config.PeerWriteBufferSize,
Metrics: peerMetrics,
MessageCreator: msgCreator,
Log: log,
InboundMsgThrottler: inboundMsgThrottler,
Network: nil, // This is set below.
Router: router,
VersionCompatibility: version.GetCompatibility(minCompatibleTime),
MyNodeID: config.MyNodeID,
MySubnets: config.TrackedSubnets,
Beacons: config.Beacons,
Validators: config.Validators,
NetworkID: config.NetworkID,
PingFrequency: config.PingFrequency,
PongTimeout: config.PingPongTimeout,
MaxClockDifference: config.MaxClockDifference,
SupportedACPs: config.SupportedACPs.List(),
ObjectedACPs: config.ObjectedACPs.List(),
ResourceTracker: config.ResourceTracker,
UptimeCalculator: config.UptimeCalculator,
IPSigner: peer.NewIPSigner(config.MyIPPort, config.TLSKey, config.BLSKey),
}

onCloseCtx, cancel := context.WithCancel(context.Background())
Expand Down Expand Up @@ -318,6 +316,11 @@ func NewNetwork(
router: router,
}
n.peerConfig.Network = n
n.ingressConnAlerter = noIngressConnAlert{
ingressConnections: n,
validators: config.Validators,
selfID: config.MyNodeID,
}
return n, nil
}

Expand Down Expand Up @@ -408,6 +411,13 @@ func (n *network) HealthCheck(context.Context) (interface{}, error) {
details[SendFailRateKey] = sendFailRate
n.metrics.sendFailRate.Set(sendFailRate)

// Make sure if we're a primary network validator, we have ingress connections
if !n.config.AllowNoIngressValidatorConnections {
connectedPrimaryValidatorInfo, isConnectedPrimaryValidatorErr := n.ingressConnAlerter.checkHealth()
healthy = healthy && isConnectedPrimaryValidatorErr == nil
details[PrimaryNetworkValidatorHealthKey] = connectedPrimaryValidatorInfo
}

// emit metrics about the lifetime of peer connections
n.metrics.updatePeerConnectionLifetimeMetrics()

Expand Down
48 changes: 48 additions & 0 deletions network/no_ingress_conn_alert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package network

import (
"errors"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/snow/validators"
"github.com/ava-labs/avalanchego/utils/constants"
)

// ErrNoIngressConnections denotes that no node is connected to this validator.
var ErrNoIngressConnections = errors.New("no ingress connections")

type ingressConnectionCounter interface {
IngressConnCount() int
}

type validatorRetriever interface {
GetValidator(subnetID ids.ID, nodeID ids.NodeID) (*validators.Validator, bool)
}

type noIngressConnAlert struct {
selfID ids.NodeID
ingressConnections ingressConnectionCounter
validators validatorRetriever
}

func ingressConnResult(n int, areWeValidator bool) map[string]interface{} {
return map[string]interface{}{"ingressConnectionCount": n, "primary network validator": areWeValidator}
}

func (nica *noIngressConnAlert) checkHealth() (interface{}, error) {
connCount := nica.ingressConnections.IngressConnCount()
_, areWeValidator := nica.validators.GetValidator(constants.PrimaryNetworkID, nica.selfID)

if connCount > 0 {
return ingressConnResult(connCount, areWeValidator), nil
}

if !areWeValidator {
return ingressConnResult(connCount, areWeValidator), nil
}

return ingressConnResult(connCount, areWeValidator), ErrNoIngressConnections
}
71 changes: 71 additions & 0 deletions network/no_ingress_conn_alert_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package network

import (
"testing"

"github.com/stretchr/testify/require"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/snow/validators"
)

type fakeValidatorRetriever struct {
result bool
}

func (m *fakeValidatorRetriever) GetValidator(ids.ID, ids.NodeID) (*validators.Validator, bool) {
return nil, m.result
}

type fakeIngressConnectionCounter struct {
res int
}

func (m *fakeIngressConnectionCounter) IngressConnCount() int {
return m.res
}

func TestNoIngressConnAlertHealthCheck(t *testing.T) {
for _, testCase := range []struct {
name string
getValidatorResult bool
ingressConnCountResult int
expectedErr error
expectedResult interface{}
}{
{
name: "not a validator of a primary network",
expectedResult: map[string]interface{}{"ingressConnectionCount": 0, "primary network validator": false},
},
{
name: "a validator of the primary network",
getValidatorResult: true,
expectedResult: map[string]interface{}{
"ingressConnectionCount": 0, "primary network validator": true,
},
expectedErr: ErrNoIngressConnections,
},
{
name: "a validator with ingress connections",
expectedResult: map[string]interface{}{"ingressConnectionCount": 42, "primary network validator": true},
expectedErr: nil,
ingressConnCountResult: 42,
getValidatorResult: true,
},
} {
t.Run(testCase.name, func(t *testing.T) {
nica := &noIngressConnAlert{
selfID: ids.EmptyNodeID,
validators: &fakeValidatorRetriever{result: testCase.getValidatorResult},
ingressConnections: &fakeIngressConnectionCounter{res: testCase.ingressConnCountResult},
}

result, err := nica.checkHealth()
require.Equal(t, testCase.expectedErr, err)
require.Equal(t, testCase.expectedResult, result)
})
}
}
3 changes: 1 addition & 2 deletions network/peer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,5 @@ type Config struct {
IPSigner *IPSigner

// IngressConnectionCount counts the ingress (to us) connections.
// Needs to be a pointer because it's shared across all peer connections.
IngressConnectionCount *atomic.Uint64
IngressConnectionCount atomic.Uint64
}
Loading

0 comments on commit 0873221

Please sign in to comment.