Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update probe-interval and stale contact point timeout calculation #2601

Merged
merged 7 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ public void HaveExpectedDefaults()
settings.ContactPoint.FallbackPort.Should().BeNull();
settings.ContactPoint.FilterOnFallbackPort.Should().BeTrue();
settings.ContactPoint.ProbingFailureTimeout.Should().Be(TimeSpan.FromSeconds(3));
settings.ContactPoint.StaleContactPointTimeout.Should().Be(3.Seconds());
settings.ContactPoint.ProbeInterval.Should().Be(TimeSpan.FromSeconds(1));
settings.ContactPoint.ProbeInterval.Should().Be(TimeSpan.FromSeconds(5));
settings.ContactPoint.ProbeIntervalJitter.Should().Be(0.2);
settings.JoinDecider.ImplClass.Should()
.Be("Akka.Management.Cluster.Bootstrap.LowestAddressJoinDecider, Akka.Management");
Expand Down Expand Up @@ -78,9 +77,8 @@ public void SetupOverridesSettings()
{
FallbackPort = 1234,
FilterOnFallbackPort = false,
StaleContactPointTimeout = 1.Seconds(),
ProbeInterval = 2.Seconds(),
ProbingFailureTimeout = 3.Seconds(),
ProbingFailureTimeout = 4.Seconds(),
ProbeIntervalJitter = 1.0
},
JoinDecider = new JoinDeciderSetup
Expand All @@ -106,9 +104,8 @@ public void SetupOverridesSettings()

settings.ContactPoint.FallbackPort.Should().Be(1234);
settings.ContactPoint.FilterOnFallbackPort.Should().BeFalse();
settings.ContactPoint.StaleContactPointTimeout.Should().Be(1.Seconds());
settings.ContactPoint.ProbeInterval.Should().Be(2.Seconds());
settings.ContactPoint.ProbingFailureTimeout.Should().Be(3.Seconds());
settings.ContactPoint.ProbingFailureTimeout.Should().Be(4.Seconds());
settings.ContactPoint.ProbeIntervalJitter.Should().Be(1.0);

settings.JoinDecider.ImplClass.Should()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,6 @@ public sealed class ContactPointOptions
/// </summary>
public double? ProbeIntervalJitter { get; set; }

/// <summary>
/// Set the time for bootstrap coordinator to consider that previously discovered contact point
/// were considered to be stale and needs to be re-discovered by the HTTP probes.
/// </summary>
public TimeSpan? StaleContactPointTimeout { get; set; }

internal void Apply(StringBuilder sb)
{
sb.AppendLine("contact-point {");
Expand All @@ -253,8 +247,6 @@ internal void Apply(StringBuilder sb)
sb.AppendLine($"probe-interval = {ProbeInterval.ToHocon()}");
if (ProbeIntervalJitter is { })
sb.AppendLine($"probe-interval-jitter = {ProbeIntervalJitter.ToHocon()}");
if (StaleContactPointTimeout is not null)
sb.AppendLine($"stale-contact-point-timeout = {StaleContactPointTimeout.ToHocon()}");

sb.AppendLine("}");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,39 +152,27 @@ public static ContactPointSettings Create(Config config)
var fallback = contactPointConfig.GetString("fallback-port");
var fallbackPort = string.IsNullOrWhiteSpace(fallback) || fallback == "<fallback-port>"
? (int?) null : int.Parse(fallback);
var probeFailureTimeout = contactPointConfig.GetTimeSpan("probing-failure-timeout", null, false);
var staleEntryTimeoutStr = contactPointConfig.GetString("stale-contact-point-timeout");
var staleEntryTimeout = string.IsNullOrWhiteSpace(staleEntryTimeoutStr)
|| staleEntryTimeoutStr is "off"
|| staleEntryTimeoutStr is "false"
|| staleEntryTimeoutStr is "no"
? probeFailureTimeout
: contactPointConfig.GetTimeSpan("stale-contact-point-timeout");


return new ContactPointSettings(
fallbackPort: fallbackPort,
filterOnFallbackPort: contactPointConfig.GetBoolean("filter-on-fallback-port"),
probingFailureTimeout: probeFailureTimeout,
probeInterval: contactPointConfig.GetTimeSpan("probe-interval", null, false),
probeIntervalJitter: contactPointConfig.GetDouble("probe-interval-jitter"),
staleContactPointTimeout: staleEntryTimeout);
probingFailureTimeout: contactPointConfig.GetTimeSpan("probing-failure-timeout", TimeSpan.FromSeconds(3), false),
probeInterval: contactPointConfig.GetTimeSpan("probe-interval", TimeSpan.FromSeconds(5), false),
probeIntervalJitter: contactPointConfig.GetDouble("probe-interval-jitter"));
}

private ContactPointSettings(
int? fallbackPort,
bool filterOnFallbackPort,
TimeSpan probingFailureTimeout,
TimeSpan probeInterval,
double probeIntervalJitter,
TimeSpan staleContactPointTimeout)
double probeIntervalJitter)
{
FallbackPort = fallbackPort;
FilterOnFallbackPort = filterOnFallbackPort;
ProbingFailureTimeout = probingFailureTimeout;
ProbeInterval = probeInterval;
ProbeIntervalJitter = probeIntervalJitter;
StaleContactPointTimeout = staleContactPointTimeout;
}

public int? FallbackPort { get; }
Expand All @@ -193,22 +181,19 @@ private ContactPointSettings(
public TimeSpan ProbeInterval { get; }
public double ProbeIntervalJitter { get; }
public int MaxSeedNodesToExpose { get; } = 5;
public TimeSpan StaleContactPointTimeout { get; }

internal ContactPointSettings Copy(
int? fallbackPort,
bool? filterOnFallbackPort,
TimeSpan? probingFailureTimeout,
TimeSpan? probeInterval,
double? probeIntervalJitter,
TimeSpan? staleContactPointTimeout)
double? probeIntervalJitter)
=> new ContactPointSettings(
fallbackPort: fallbackPort ?? FallbackPort,
filterOnFallbackPort: filterOnFallbackPort ?? FilterOnFallbackPort,
probingFailureTimeout: probingFailureTimeout ?? ProbingFailureTimeout,
probeInterval: probeInterval ?? ProbeInterval,
probeIntervalJitter: probeIntervalJitter ?? ProbeIntervalJitter,
staleContactPointTimeout: staleContactPointTimeout ?? StaleContactPointTimeout);
probeIntervalJitter: probeIntervalJitter ?? ProbeIntervalJitter);
}

public sealed class JoinDeciderSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,14 @@ public sealed class ContactPointSetup
/// Max amount of jitter to be added on retries
/// </summary>
public double? ProbeIntervalJitter { get; set; }

public TimeSpan? StaleContactPointTimeout { get; set; }

internal ClusterBootstrapSettings.ContactPointSettings Apply(ClusterBootstrapSettings.ContactPointSettings settings)
=> settings.Copy(
fallbackPort: FallbackPort,
filterOnFallbackPort: FilterOnFallbackPort,
probingFailureTimeout: ProbingFailureTimeout,
probeInterval: ProbeInterval,
probeIntervalJitter: ProbeIntervalJitter,
staleContactPointTimeout: StaleContactPointTimeout);
probeIntervalJitter: ProbeIntervalJitter);
}

public sealed class JoinDeciderSetup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ public BootstrapCoordinator(ServiceDiscovery discovery, IJoinDecider joinDecider
_discovery = discovery;
_joinDecider = joinDecider;
_settings = settings;
_staleContactPointTimeout = _settings.ContactPoint.ProbeInterval + _settings.ContactPoint.StaleContactPointTimeout;

var cps = _settings.ContactPoint;
_staleContactPointTimeout = cps.ProbeInterval + cps.ProbingFailureTimeout;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM


_log = Context.GetLogger();
_cluster = Akka.Cluster.Cluster.Get(Context.System);
Expand Down
15 changes: 2 additions & 13 deletions src/management/Akka.Management/Resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -175,23 +175,12 @@ akka.management {
# it will initiate rediscovery again instead of keep trying.
#
# Note that the effective final value being used to calculate probing timeout is
# probe-interval + probing-failure-timeout, or 4 seconds by default
# probe-interval + probing-failure-timeout, or 8 seconds by default
probing-failure-timeout = 3s

# Set the time for bootstrap coordinator to consider that previously discovered contact point
# were considered to be stale and needs to be re-discovered by the HTTP probes.
#
# The legacy value for this is equals to the value of `probing-failure-timeout`. This is the value
# that will be used if `stale-contact-point-timeout` is set to `off`, `false`, `no`, or empty.
#
# Note that the effective final value being used to calculate stale contact point timeout is
# probe-interval + stale-contact-point-timeout if this setting is set,
# or probe-interval + probing-failure-timeout if this setting is not set.
stale-contact-point-timeout = off

# Interval at which contact points should be polled
# the effective interval used is this value plus the same value multiplied by the jitter value
probe-interval = 1s
probe-interval = 5s

# Max amount of jitter to be added on retries
probe-interval-jitter = 0.2
Expand Down