From 7d745b55bdef0b37c1eac55ab04d5776e7883d9a Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 18 Apr 2021 14:07:36 -0500 Subject: [PATCH 1/3] Configure duration for applying `MemberStatus.WeaklyUp` to joining nodes port of https://github.com/akka/akka/pull/29665 --- .../CoreAPISpec.ApproveCluster.approved.txt | 1 + .../MemberWeaklyUpSpec.cs | 2 +- .../MinMembersBeforeUpSpec.cs | 2 +- .../Akka.Cluster.Tests/ClusterConfigSpec.cs | 1 + src/core/Akka.Cluster/ClusterDaemon.cs | 2 +- src/core/Akka.Cluster/ClusterSettings.cs | 41 ++++++++++++++++--- .../Akka.Cluster/Configuration/Cluster.conf | 5 +-- 7 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/core/Akka.API.Tests/CoreAPISpec.ApproveCluster.approved.txt b/src/core/Akka.API.Tests/CoreAPISpec.ApproveCluster.approved.txt index 07cb439fdfe..c8c6f3b7c0e 100644 --- a/src/core/Akka.API.Tests/CoreAPISpec.ApproveCluster.approved.txt +++ b/src/core/Akka.API.Tests/CoreAPISpec.ApproveCluster.approved.txt @@ -220,6 +220,7 @@ namespace Akka.Cluster public string UseDispatcher { get; } public bool VerboseGossipReceivedLogging { get; } public bool VerboseHeartbeatLogging { get; } + public System.TimeSpan WeaklyUpAfter { get; } } [Akka.Annotations.InternalApiAttribute()] public interface IClusterActorRefProvider : Akka.Actor.IActorRefProvider, Akka.Remote.IRemoteActorRefProvider { } diff --git a/src/core/Akka.Cluster.Tests.MultiNode/MemberWeaklyUpSpec.cs b/src/core/Akka.Cluster.Tests.MultiNode/MemberWeaklyUpSpec.cs index ae7998d7afd..1d3a143a7a1 100644 --- a/src/core/Akka.Cluster.Tests.MultiNode/MemberWeaklyUpSpec.cs +++ b/src/core/Akka.Cluster.Tests.MultiNode/MemberWeaklyUpSpec.cs @@ -35,7 +35,7 @@ public MemberWeaklyUpConfig() CommonConfig = DebugConfig(on: false) .WithFallback(ConfigurationFactory.ParseString(@" akka.remote.retry-gate-closed-for = 3s - akka.cluster.allow-weakly-up-members = on")) + akka.cluster.allow-weakly-up-members = 3s")) .WithFallback(MultiNodeClusterSpec.ClusterConfig()); TestTransport = true; diff --git a/src/core/Akka.Cluster.Tests.MultiNode/MinMembersBeforeUpSpec.cs b/src/core/Akka.Cluster.Tests.MultiNode/MinMembersBeforeUpSpec.cs index 5f3e0d4642f..f7381a18cc8 100644 --- a/src/core/Akka.Cluster.Tests.MultiNode/MinMembersBeforeUpSpec.cs +++ b/src/core/Akka.Cluster.Tests.MultiNode/MinMembersBeforeUpSpec.cs @@ -103,7 +103,7 @@ public MinMembersBeforeUpWithWeaklyUpSpecConfig() CommonConfig = ConfigurationFactory.ParseString(@" akka.cluster.min-nr-of-members = 3 - akka.cluster.allow-weakly-up-members = on + akka.cluster.allow-weakly-up-members = 3s ").WithFallback(MultiNodeClusterSpec.ClusterConfigWithFailureDetectorPuppet()); } } diff --git a/src/core/Akka.Cluster.Tests/ClusterConfigSpec.cs b/src/core/Akka.Cluster.Tests/ClusterConfigSpec.cs index 7dedfbb61b7..6899f5fe93f 100644 --- a/src/core/Akka.Cluster.Tests/ClusterConfigSpec.cs +++ b/src/core/Akka.Cluster.Tests/ClusterConfigSpec.cs @@ -42,6 +42,7 @@ public void Clustering_must_be_able_to_parse_generic_cluster_config_elements() settings.LeaderActionsInterval.Should().Be(1.Seconds()); settings.UnreachableNodesReaperInterval.Should().Be(1.Seconds()); settings.AllowWeaklyUpMembers.Should().BeTrue(); + settings.WeaklyUpAfter.Should().Be(7.Seconds()); settings.PublishStatsInterval.Should().NotHaveValue(); settings.AutoDownUnreachableAfter.Should().NotHaveValue(); settings.DownRemovalMargin.Should().Be(TimeSpan.Zero); diff --git a/src/core/Akka.Cluster/ClusterDaemon.cs b/src/core/Akka.Cluster/ClusterDaemon.cs index 77013e8e360..536c362d9b8 100644 --- a/src/core/Akka.Cluster/ClusterDaemon.cs +++ b/src/core/Akka.Cluster/ClusterDaemon.cs @@ -2174,7 +2174,7 @@ public void LeaderActions() { _leaderActionCounter += 1; - if (_cluster.Settings.AllowWeaklyUpMembers && _leaderActionCounter >= 3) + if (_cluster.Settings.AllowWeaklyUpMembers && (_leaderActionCounter * _cluster.Settings.LeaderActionsInterval.TotalMilliseconds) >= _cluster.Settings.WeaklyUpAfter.TotalMilliseconds) MoveJoiningToWeaklyUp(); if (_leaderActionCounter == firstNotice || _leaderActionCounter % periodicNotice == 0) diff --git a/src/core/Akka.Cluster/ClusterSettings.cs b/src/core/Akka.Cluster/ClusterSettings.cs index 06d16e348fb..b6c54ee2cdc 100644 --- a/src/core/Akka.Cluster/ClusterSettings.cs +++ b/src/core/Akka.Cluster/ClusterSettings.cs @@ -95,7 +95,29 @@ public ClusterSettings(Config config, string systemName) DowningProviderType = typeof(NoDowning); RunCoordinatedShutdownWhenDown = clusterConfig.GetBoolean("run-coordinated-shutdown-when-down", false); - AllowWeaklyUpMembers = clusterConfig.GetBoolean("allow-weakly-up-members", false); + + // TODO: replace with a switch expression when we upgrade to C#8 or later + TimeSpan GetWeaklyUpDuration() + { + var cKey = "allow-weakly-up-members"; + switch (clusterConfig.GetString(cKey, string.Empty) + .ToLowerInvariant()) + { + case "off": + return TimeSpan.Zero; + case "on": + + return TimeSpan.FromSeconds(7); // for backwards compatibility when it wasn't a duration + default: + var val = clusterConfig.GetTimeSpan(cKey, TimeSpan.FromSeconds(7)); + if (val < TimeSpan.FromSeconds(7)) + throw new ConfigurationException($"Valid settings for [akka.cluster.{cKey}] are 'off', 'on', or a timespan greater than 0s. Received [{val}]"); + return val; + } + } + + WeaklyUpAfter = GetWeaklyUpDuration(); + } /// @@ -263,12 +285,21 @@ public ClusterSettings(Config config, string systemName) /// /// If this is set to "off", the leader will not move members to during a network /// split. This feature allows the leader to accept members to be - /// so they become part of the cluster even during a network split. The leader will - /// move members to after 3 rounds of 'leader-actions-interval' - /// without convergence. + /// so they become part of the cluster even during a network split. + /// + /// The leader will move members to status once convergence has been reached. + /// + public bool AllowWeaklyUpMembers => WeaklyUpAfter != TimeSpan.Zero; + + /// + /// The duration after which a member who is currently will be marked as + /// in the event that members of the cluster are currently unreachable. + /// + /// This is designed to allow new cluster members to perform work even in the event of a cluster split. + /// /// The leader will move members to status once convergence has been reached. /// - public bool AllowWeaklyUpMembers { get; } + public TimeSpan WeaklyUpAfter { get; } } } diff --git a/src/core/Akka.Cluster/Configuration/Cluster.conf b/src/core/Akka.Cluster/Configuration/Cluster.conf index d496c4f5064..96b9fa3ddd3 100644 --- a/src/core/Akka.Cluster/Configuration/Cluster.conf +++ b/src/core/Akka.Cluster/Configuration/Cluster.conf @@ -66,10 +66,9 @@ akka { # If this is set to "off", the leader will not move 'Joining' members to 'Up' during a network # split. This feature allows the leader to accept 'Joining' members to be 'WeaklyUp' # so they become part of the cluster even during a network split. The leader will - # move `Joining` members to 'WeaklyUp' after 3 rounds of 'leader-actions-interval' - # without convergence. + # move `Joining` members to 'WeaklyUp' after this configured duration without convergence. # The leader will move 'WeaklyUp' members to 'Up' status once convergence has been reached. - allow-weakly-up-members = on + allow-weakly-up-members = 7s # The roles of this member. List of strings, e.g. roles = ["A", "B"]. # The roles are part of the membership information and can be used by From e8c1be796956bd7358b34578de39682b5e44d139 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 18 Apr 2021 16:44:44 -0500 Subject: [PATCH 2/3] fixed validation check for TimeSpan duration passed in via HOCON --- src/core/Akka.Cluster/ClusterSettings.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Akka.Cluster/ClusterSettings.cs b/src/core/Akka.Cluster/ClusterSettings.cs index b6c54ee2cdc..2894cd50c5d 100644 --- a/src/core/Akka.Cluster/ClusterSettings.cs +++ b/src/core/Akka.Cluster/ClusterSettings.cs @@ -110,7 +110,7 @@ TimeSpan GetWeaklyUpDuration() return TimeSpan.FromSeconds(7); // for backwards compatibility when it wasn't a duration default: var val = clusterConfig.GetTimeSpan(cKey, TimeSpan.FromSeconds(7)); - if (val < TimeSpan.FromSeconds(7)) + if(!(val > TimeSpan.Zero)) throw new ConfigurationException($"Valid settings for [akka.cluster.{cKey}] are 'off', 'on', or a timespan greater than 0s. Received [{val}]"); return val; } From 2c55905bbf54070822bbf662ec0014432622b9c1 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Sun, 18 Apr 2021 19:38:46 -0500 Subject: [PATCH 3/3] harden ClusterLogSpecs --- src/core/Akka.Cluster.Tests/ClusterLogSpec.cs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/core/Akka.Cluster.Tests/ClusterLogSpec.cs b/src/core/Akka.Cluster.Tests/ClusterLogSpec.cs index 1653a45f5b8..b482b13959c 100644 --- a/src/core/Akka.Cluster.Tests/ClusterLogSpec.cs +++ b/src/core/Akka.Cluster.Tests/ClusterLogSpec.cs @@ -47,10 +47,13 @@ protected ClusterLogSpec(ITestOutputHelper output, Config config = null) protected void AwaitUp() { - AwaitCondition(() => ClusterView.IsSingletonCluster); - ClusterView.Self.Address.ShouldBe(_selfAddress); - ClusterView.Members.Select(m => m.Address).ShouldBe(new Address[] { _selfAddress }); - AwaitAssert(() => ClusterView.Status.ShouldBe(MemberStatus.Up)); + Within(TimeSpan.FromSeconds(10), () => + { + AwaitCondition(() => ClusterView.IsSingletonCluster); + ClusterView.Self.Address.ShouldBe(_selfAddress); + ClusterView.Members.Select(m => m.Address).ShouldBe(new Address[] { _selfAddress }); + AwaitAssert(() => ClusterView.Status.ShouldBe(MemberStatus.Up)); + }); } /// /// The expected log info pattern to intercept after a . @@ -71,9 +74,12 @@ protected void Join(string expected) /// protected void Down(string expected) { - EventFilter + Within(TimeSpan.FromSeconds(10), () => + { + EventFilter .Info(contains: expected) .ExpectOne(() => _cluster.Down(_selfAddress)); + }); } }