Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prio nodes farmerbot #988

Merged
merged 11 commits into from
May 19, 2024
2 changes: 2 additions & 0 deletions farmerbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ included_nodes: [optional, if no nodes are added then the farmerbot will include
- "<your node ID to be included, required at least 2>"
excluded_nodes:
- "<your node ID to be excluded, optional>"
priority_nodes:
- "<your node ID to have a priority in nodes management, optional>"
never_shutdown_nodes:
- "<your node ID to be never shutdown, optional>"
power:
Expand Down
48 changes: 40 additions & 8 deletions farmerbot/internal/farmerbot.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ func (f *FarmerBot) serve(ctx context.Context) error {
return nil, fmt.Errorf("failed to load request payload: %w", err)
}

_, ok := f.nodes[nodeID]
if ok {
_, _, err := f.getNode(nodeID)
if err == nil {
return nil, fmt.Errorf("node %d already exists", nodeID)
}

Expand Down Expand Up @@ -259,12 +259,14 @@ func (f *FarmerBot) iterateOnNodes(ctx context.Context, subConn Substrate) error
}

// remove nodes that don't exist anymore in the farm
for nodeID := range f.state.nodes {
if !slices.Contains(farmNodes, nodeID) {
f.state.deleteNode(nodeID)
for _, node := range f.state.nodes {
if !slices.Contains(farmNodes, uint32(node.ID)) {
f.state.deleteNode(uint32(node.ID))
}
}

farmNodes = addPriorityToNodes(f.state.config.PriorityNodes, farmNodes)

for _, nodeID := range farmNodes {
if slices.Contains(f.state.config.ExcludedNodes, nodeID) {
continue
Expand All @@ -282,7 +284,10 @@ func (f *FarmerBot) iterateOnNodes(ctx context.Context, subConn Substrate) error
log.Error().Err(err).Send()
}

node := f.state.nodes[nodeID]
_, node, err := f.state.getNode(nodeID)
if err != nil {
log.Error().Err(err).Send()
}

if node.powerState == off && (node.neverShutDown || node.hasActiveRentContract) {
log.Debug().Uint32("nodeID", nodeID).Msg("Power on node because it is set to never shutdown or has a rent contract")
Expand Down Expand Up @@ -328,11 +333,38 @@ func (f *FarmerBot) iterateOnNodes(ctx context.Context, subConn Substrate) error
return nil
}

func addPriorityToNodes(priorityNodes, farmNodes []uint32) []uint32 {
updatedPriorityNodes := make([]uint32, len(priorityNodes))
copy(updatedPriorityNodes, priorityNodes)

// remove nodes that don't exist in farm from priority nodes
for i := 0; i < len(updatedPriorityNodes); i++ {
nodeID := updatedPriorityNodes[i]
if !slices.Contains(farmNodes, nodeID) {
updatedPriorityNodes = slices.Delete(updatedPriorityNodes, i, i+1)
i--
}
}

// append priority and the rest of farm nodes
updatedFarmNodes := make([]uint32, len(updatedPriorityNodes))
copy(updatedFarmNodes, updatedPriorityNodes)

// add the rest of farm nodes
for i := 0; i < len(farmNodes); i++ {
if !slices.Contains(updatedPriorityNodes, farmNodes[i]) {
updatedFarmNodes = append(updatedFarmNodes, farmNodes[i])
}
}

return updatedFarmNodes
}

func (f *FarmerBot) addOrUpdateNode(ctx context.Context, subConn Substrate, nodeID uint32) error {
neverShutDown := slices.Contains(f.state.config.NeverShutDownNodes, nodeID)

oldNode, nodeExists := f.state.nodes[nodeID]
if nodeExists {
_, oldNode, err := f.state.getNode(nodeID)
if err == nil { // node exists
updateErr := oldNode.update(ctx, subConn, f.rmbNodeClient, neverShutDown, f.state.farm.DedicatedFarm, f.config.ContinueOnPoweringOnErr)

// update old node state even if it failed
Expand Down
17 changes: 9 additions & 8 deletions farmerbot/internal/farmerbot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func TestFarmerbot(t *testing.T) {
inputs := Config{
FarmID: 1,
IncludedNodes: []uint32{1, 2},
PriorityNodes: []uint32{2},
xmonader marked this conversation as resolved.
Show resolved Hide resolved
Power: power{WakeUpThresholdPercentages: ThresholdPercentages{
CRU: 50,
SRU: 50,
Expand All @@ -53,8 +54,8 @@ func TestFarmerbot(t *testing.T) {
assert.NoError(t, err)
farmerbot.state = state

oldNode1 := farmerbot.nodes[1]
oldNode2 := farmerbot.nodes[2]
oldNode1 := farmerbot.nodes[0]
oldNode2 := farmerbot.nodes[1]

t.Run("invalid identity", func(t *testing.T) {
_, err := NewFarmerBot(ctx, Config{}, "dev", "invalid", peer.KeyTypeSr25519)
Expand All @@ -71,10 +72,10 @@ func TestFarmerbot(t *testing.T) {
assert.True(t, errors.Is(err, substrate.ErrNotFound))
})

t.Run("test iterateOnNodes: update nodes and power off extra node (periodic wake up: already on)", func(t *testing.T) {
t.Run("test iterateOnNodes: update nodes and power off extra node (respect priority - periodic wake up: already on)", func(t *testing.T) {
mockRMBAndSubstrateCalls(ctx, sub, rmb, inputs, true, true, resources, []string{}, false, false)

sub.EXPECT().SetNodePowerTarget(farmerbot.identity, gomock.Any(), false).Return(types.Hash{}, nil)
sub.EXPECT().SetNodePowerTarget(farmerbot.identity, uint32(2), false).Return(types.Hash{}, nil)

err = farmerbot.iterateOnNodes(ctx, sub)
assert.NoError(t, err)
Expand All @@ -85,8 +86,8 @@ func TestFarmerbot(t *testing.T) {

oldNode1.powerState = off
oldNode2.powerState = off
state.addNode(oldNode1)
state.addNode(oldNode2)
assert.NoError(t, farmerbot.updateNode(oldNode1))
assert.NoError(t, farmerbot.updateNode(oldNode2))
farmerbot.state = state

mockRMBAndSubstrateCalls(ctx, sub, rmb, inputs, false, true, resources, []string{}, false, false)
Expand All @@ -100,8 +101,8 @@ func TestFarmerbot(t *testing.T) {
t.Run("test iterateOnNodes: update nodes (periodic wake up: failed to set off node)", func(t *testing.T) {
oldNode1.powerState = off
oldNode2.powerState = off
state.addNode(oldNode1)
state.addNode(oldNode2)
assert.NoError(t, farmerbot.updateNode(oldNode1))
assert.NoError(t, farmerbot.updateNode(oldNode2))
farmerbot.state = state

mockRMBAndSubstrateCalls(ctx, sub, rmb, inputs, false, true, resources, []string{}, false, false)
Expand Down
112 changes: 54 additions & 58 deletions farmerbot/internal/find_node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ import (
"github.com/threefoldtech/zos/pkg/gridtypes"
)

func reset(t *testing.T, farmerbot FarmerBot, oldNode1, oldNode2 node, oldFarm substrate.Farm) {
t.Helper()

assert.NoError(t, farmerbot.updateNode(oldNode1))
assert.NoError(t, farmerbot.updateNode(oldNode2))
farmerbot.farm = oldFarm
}

func TestFindNode(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down Expand Up @@ -45,15 +53,15 @@ func TestFindNode(t *testing.T) {
assert.NoError(t, err)
farmerbot.state = state

node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.dedicated = false
farmerbot.nodes[1] = node
node2 := farmerbot.nodes[2]
farmerbot.nodes[0] = node
node2 := farmerbot.nodes[1]
node2.dedicated = false
farmerbot.nodes[2] = node2
farmerbot.nodes[1] = node2

oldNode1 := farmerbot.nodes[1]
oldNode2 := farmerbot.nodes[2]
oldNode1 := farmerbot.nodes[0]
oldNode2 := farmerbot.nodes[1]
oldFarm := farmerbot.farm

nodeOptions := NodeFilterOption{
Expand All @@ -67,93 +75,87 @@ func TestFindNode(t *testing.T) {
t.Run("test valid find node: found an ON node", func(t *testing.T) {
nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
_, node, err := farmerbot.getNode(nodeID)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, node)

reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: found an ON node, trying to power off fails because resources is claimed", func(t *testing.T) {
nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, nodeID)

_, node, err := farmerbot.getNode(nodeID)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, node)

err = farmerbot.powerOff(sub, nodeID)
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: found an ON node (first is OFF)", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.powerState = off
farmerbot.nodes[1] = node
farmerbot.nodes[0] = node

nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Equal(t, nodeID, uint32(farmerbot.nodes[2].ID))
assert.Equal(t, nodeID, uint32(farmerbot.nodes[1].ID))

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: node is rented (second node is found)", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.hasActiveRentContract = true
farmerbot.nodes[1] = node
farmerbot.nodes[0] = node

nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.config.IncludedNodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: node is dedicated so node is found", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.dedicated = true
farmerbot.nodes[1] = node
farmerbot.nodes[0] = node

nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.config.IncludedNodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: options and nodes are dedicated and nodes are unused", func(t *testing.T) {
nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.config.IncludedNodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: no gpus with specified device/vendor in first node (second is found)", func(t *testing.T) {
node2 := farmerbot.nodes[2]
node2 := farmerbot.nodes[1]
node2.gpus = []zos.GPU{
{
Device: "device",
Vendor: "vendor",
},
}
farmerbot.nodes[2] = node2
farmerbot.nodes[1] = node2

nodeID, err := farmerbot.findNode(sub, NodeFilterOption{GPUVendors: []string{"vendor"}, GPUDevices: []string{"device"}})
assert.NoError(t, err)
assert.Equal(t, nodeID, uint32(farmerbot.nodes[2].ID))
assert.Equal(t, nodeID, uint32(farmerbot.nodes[1].ID))

farmerbot.addNode(oldNode2)
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test invalid find node: no gpus in nodes", func(t *testing.T) {
Expand All @@ -162,20 +164,19 @@ func TestFindNode(t *testing.T) {
})

t.Run("test invalid find node: found an OFF node but change power failed", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.powerState = off
node2 := farmerbot.nodes[2]
node2 := farmerbot.nodes[1]
node2.powerState = off
farmerbot.nodes[1] = node
farmerbot.nodes[2] = node2
farmerbot.nodes[0] = node
farmerbot.nodes[1] = node2

sub.EXPECT().SetNodePowerTarget(farmerbot.identity, gomock.Any(), true).Return(types.Hash{}, fmt.Errorf("error"))

_, err := farmerbot.findNode(sub, nodeOptions)
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test invalid find node: no enough public ips", func(t *testing.T) {
Expand Down Expand Up @@ -203,38 +204,33 @@ func TestFindNode(t *testing.T) {
})

t.Run("test valid find node: nodes are dedicated and used, no nodes found", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.dedicated = true
farmerbot.nodes[1] = node
node2 := farmerbot.nodes[2]
farmerbot.nodes[0] = node
node2 := farmerbot.nodes[1]
node2.dedicated = true
farmerbot.nodes[2] = node2
farmerbot.nodes[1] = node2

_, err := farmerbot.findNode(sub, NodeFilterOption{})
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test invalid find node: node is excluded", func(t *testing.T) {
_, err := farmerbot.findNode(sub, NodeFilterOption{NodesExcluded: []uint32{uint32(farmerbot.nodes[1].ID), uint32(farmerbot.nodes[2].ID)}})
_, err := farmerbot.findNode(sub, NodeFilterOption{NodesExcluded: []uint32{uint32(farmerbot.nodes[0].ID), uint32(farmerbot.nodes[1].ID)}})
assert.Error(t, err)
})

t.Run("test invalid find node: node cannot claim resources", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.resources.total = capacity{}
node2 := farmerbot.nodes[2]
node2 := farmerbot.nodes[1]
node2.resources.total = capacity{}
farmerbot.nodes[1] = node
farmerbot.nodes[2] = node2
farmerbot.nodes[0] = node
farmerbot.nodes[1] = node2

_, err := farmerbot.findNode(sub, nodeOptions)
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
})
}
Loading
Loading