Skip to content

Commit

Permalink
Merge pull request #32438 from vespa-engine/andreer/group-independent…
Browse files Browse the repository at this point in the history
…-rename

group independent rename
  • Loading branch information
mpolden authored Sep 20, 2024
2 parents 73d26b5 + 151be05 commit f264e0b
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.flags.Flags;
Expand All @@ -10,6 +10,8 @@
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.ClusterId;

import java.time.Duration;
import java.util.Collections;
Expand All @@ -33,51 +35,57 @@ public HostRenamer(NodeRepository nodeRepository, Duration interval, Metric metr
this.hostnameSchemeFlag = Flags.HOSTNAME_SCHEME.bindTo(nodeRepository.flagSource());
}

record ClusterGroup(ClusterId clusterId, Integer groupIndex){}

@Override
protected double maintain() {
if (!nodeRepository().nodes().isWorking()) return 0.0;
NodeList allNodes = nodeRepository().nodes().list();
NodeList activeHosts = allNodes.nodeType(NodeType.host).state(Node.State.active);
Set<ApplicationId> retiringApplications = applicationsOnRetiringHosts(activeHosts, allNodes);
Set<ClusterGroup> retiringClusterGroups = applicationsOnRetiringHosts(activeHosts, allNodes);
for (var host : activeHosts) {
Set<ApplicationId> applicationsOnHost = applicationsOn(host, allNodes);
if (!changeHostname(host, applicationsOnHost)) continue;
Set<ClusterGroup> clusterGroupsOnHost = applicationsGroupsOn(host, allNodes);
if (!changeHostname(host, clusterGroupsOnHost)) continue;

if (Collections.disjoint(retiringApplications, applicationsOnHost)) {
if (Collections.disjoint(retiringClusterGroups, clusterGroupsOnHost)) {
LOG.info("Deprovisioning " + host + " to change its hostname");
nodeRepository().nodes().deprovision(host.hostname(), Agent.system, nodeRepository().clock().instant());
retiringApplications.addAll(applicationsOnHost);
retiringClusterGroups.addAll(clusterGroupsOnHost);
}
}
return 1.0;
}

private Set<ApplicationId> applicationsOn(Node host, NodeList allNodes) {
Set<ApplicationId> applications = new HashSet<>();
private Set<ClusterGroup> applicationsGroupsOn(Node host, NodeList allNodes) {
Set<ClusterGroup> clusterGroups = new HashSet<>();
for (var child : allNodes.childrenOf(host)) {
applications.add(child.allocation().get().owner());
Allocation allocation = child.allocation().orElseThrow();
clusterGroups.add(new ClusterGroup(
new ClusterId(allocation.owner(), allocation.membership().cluster().id()),
allocation.membership().cluster().group().map(ClusterSpec.Group::index).orElse(0)));
}
return applications;
return clusterGroups;
}

private Set<ApplicationId> applicationsOnRetiringHosts(NodeList activeHosts, NodeList allNodes) {
Set<ApplicationId> applications = new HashSet<>();
private Set<ClusterGroup> applicationsOnRetiringHosts(NodeList activeHosts, NodeList allNodes) {
Set<ClusterGroup> applications = new HashSet<>();
for (var host : activeHosts.retiring()) {
applications.addAll(applicationsOn(host, allNodes));
applications.addAll(applicationsGroupsOn(host, allNodes));
}
return applications;
}

private boolean changeHostname(Node node, Set<ApplicationId> instances) {
private boolean changeHostname(Node node, Set<ClusterGroup> clusterGroups) {
if (node.hostname().endsWith(".vespa-cloud.net")) {
return false;
}
Set<String> wantedSchemes;
if (instances.isEmpty()) {
if (clusterGroups.isEmpty()) {
wantedSchemes = Set.of(hostnameSchemeFlag.value());
} else {
wantedSchemes = instances.stream()
.map(instance -> hostnameSchemeFlag.withApplicationId(Optional.of(instance)).value())
wantedSchemes = clusterGroups.stream()
.map(clusterGroup -> hostnameSchemeFlag.withApplicationId(
Optional.of(clusterGroup.clusterId().application())).value())
.collect(Collectors.toSet());
}
return wantedSchemes.size() == 1 && wantedSchemes.iterator().next().equals("standard");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.function.Supplier;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

/**
Expand Down Expand Up @@ -71,6 +72,39 @@ public void rename() {
assertEquals(0, list.get().retiring().size(), "No more hosts to rename");
}

@Test
public void renameGrouped() {
InMemoryFlagSource flagSource = new InMemoryFlagSource();
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
.flagSource(flagSource)
.build();
Supplier<NodeList> list = () -> tester.nodeRepository().nodes().list().not().state(Node.State.deprovisioned);
HostRenamer renamer = new HostRenamer(tester.nodeRepository(), Duration.ofDays(1), new MockMetric());

ApplicationId groupedApp = ProvisioningTester.applicationId("groupedApp");
int hostCount = 4;
provisionHosts(hostCount, tester, "legacy.example.com");

deployGroupedApp(groupedApp, tester);

// Nothing happens when flag is unset
renamer.maintain();
assertEquals(0, list.get().retiring().size(), "No hosts to rename when feature flag is unset");

// Rename hosts
flagSource.withStringFlag(Flags.HOSTNAME_SCHEME.id(), "standard");
renamer.maintain();

assertEquals(2, list.get().owner(groupedApp).retiring().size(), "One node per group is retired at a time");
List<Node> retiringNodes = list.get().owner(groupedApp).retiring().asList();
assertNotEquals(
"Retiring nodes are from different groups",
retiringNodes.get(0).allocation().get().membership().cluster().group(),
retiringNodes.get(1).allocation().get().membership().cluster().group()
);
assertEquals(2, list.get().hosts().retiring().size(), "Two hosts should be retired");
}

private void replaceHosts(NodeList hosts, ProvisioningTester tester) {
for (var host : hosts) {
if (!host.status().wantToRetire()) throw new IllegalArgumentException(host + " is not requested to retire");
Expand Down Expand Up @@ -99,6 +133,12 @@ private void deploy(ApplicationId application, ProvisioningTester tester) {
tester.deploy(application, contentSpec, capacity);
}

private void deployGroupedApp(ApplicationId application, ProvisioningTester tester) {
ClusterSpec group0Spec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7").build();
Capacity capacity = Capacity.from(new ClusterResources(4, 2, new NodeResources(2, 8, 50, 1)));
tester.deploy(application, group0Spec, capacity);
}

private void provisionHosts(int count, ProvisioningTester tester, String domain) {
List<Node> nodes = tester.makeProvisionedNodes(count, (index) -> "host-" + index + "." + domain, new Flavor(new NodeResources(32, 128, 1024, 10)),
Optional.empty(), NodeType.host, 10, false);
Expand Down

0 comments on commit f264e0b

Please sign in to comment.