Skip to content

Commit

Permalink
Merge pull request #988 from threefoldtech/development_prio_nodes_far…
Browse files Browse the repository at this point in the history
…merbot

 prio nodes farmerbot
  • Loading branch information
xmonader authored May 19, 2024
2 parents c060542 + 779f74b commit 65791e6
Show file tree
Hide file tree
Showing 11 changed files with 315 additions and 255 deletions.
2 changes: 2 additions & 0 deletions farmerbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ included_nodes: [optional, if no nodes are added then the farmerbot will include
- "<your node ID to be included, required at least 2>"
excluded_nodes:
- "<your node ID to be excluded, optional>"
priority_nodes:
- "<your node ID to have a priority in nodes management, optional>"
never_shutdown_nodes:
- "<your node ID to be never shutdown, optional>"
power:
Expand Down
1 change: 0 additions & 1 deletion farmerbot/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ require (
github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go v0.15.0
github.com/threefoldtech/zos v0.5.6-0.20240226114056-364e04acbed3
github.com/vedhavyas/go-subkey v1.0.3
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
gopkg.in/yaml.v3 v3.0.1
)

Expand Down
42 changes: 34 additions & 8 deletions farmerbot/internal/farmerbot.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ func (f *FarmerBot) serve(ctx context.Context) error {
return nil, fmt.Errorf("failed to load request payload: %w", err)
}

_, ok := f.nodes[nodeID]
if ok {
_, _, err := f.getNode(nodeID)
if err == nil {
return nil, fmt.Errorf("node %d already exists", nodeID)
}

Expand Down Expand Up @@ -259,12 +259,14 @@ func (f *FarmerBot) iterateOnNodes(ctx context.Context, subConn Substrate) error
}

// remove nodes that don't exist anymore in the farm
for nodeID := range f.state.nodes {
if !slices.Contains(farmNodes, nodeID) {
f.state.deleteNode(nodeID)
for _, node := range f.state.nodes {
if !slices.Contains(farmNodes, uint32(node.ID)) {
f.state.deleteNode(uint32(node.ID))
}
}

farmNodes = addPriorityToNodes(f.state.config.PriorityNodes, farmNodes)

for _, nodeID := range farmNodes {
if slices.Contains(f.state.config.ExcludedNodes, nodeID) {
continue
Expand All @@ -282,7 +284,10 @@ func (f *FarmerBot) iterateOnNodes(ctx context.Context, subConn Substrate) error
log.Error().Err(err).Send()
}

node := f.state.nodes[nodeID]
_, node, err := f.state.getNode(nodeID)
if err != nil {
log.Error().Err(err).Send()
}

if node.powerState == off && (node.neverShutDown || node.hasActiveRentContract) {
log.Debug().Uint32("nodeID", nodeID).Msg("Power on node because it is set to never shutdown or has a rent contract")
Expand Down Expand Up @@ -328,11 +333,32 @@ func (f *FarmerBot) iterateOnNodes(ctx context.Context, subConn Substrate) error
return nil
}

func addPriorityToNodes(priorityNodes, farmNodes []uint32) []uint32 {
updatedFarmNodes := make([]uint32, len(farmNodes))

// add valid priority nodes (exist in farm) without duplicates
for i := 0; i < len(priorityNodes); i++ {
nodeID := priorityNodes[i]
if slices.Contains(farmNodes, nodeID) && !slices.Contains(updatedFarmNodes, nodeID) {
updatedFarmNodes = append(updatedFarmNodes, nodeID)
}
}

// add the rest of farm nodes
for i := 0; i < len(farmNodes); i++ {
if !slices.Contains(updatedFarmNodes, farmNodes[i]) {
updatedFarmNodes = append(updatedFarmNodes, farmNodes[i])
}
}

return updatedFarmNodes
}

func (f *FarmerBot) addOrUpdateNode(ctx context.Context, subConn Substrate, nodeID uint32) error {
neverShutDown := slices.Contains(f.state.config.NeverShutDownNodes, nodeID)

oldNode, nodeExists := f.state.nodes[nodeID]
if nodeExists {
_, oldNode, err := f.state.getNode(nodeID)
if err == nil { // node exists
updateErr := oldNode.update(ctx, subConn, f.rmbNodeClient, neverShutDown, f.state.farm.DedicatedFarm, f.config.ContinueOnPoweringOnErr)

// update old node state even if it failed
Expand Down
17 changes: 9 additions & 8 deletions farmerbot/internal/farmerbot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func TestFarmerbot(t *testing.T) {
inputs := Config{
FarmID: 1,
IncludedNodes: []uint32{1, 2},
PriorityNodes: []uint32{2},
Power: power{WakeUpThresholdPercentages: ThresholdPercentages{
CRU: 50,
SRU: 50,
Expand All @@ -53,8 +54,8 @@ func TestFarmerbot(t *testing.T) {
assert.NoError(t, err)
farmerbot.state = state

oldNode1 := farmerbot.nodes[1]
oldNode2 := farmerbot.nodes[2]
oldNode1 := farmerbot.nodes[0]
oldNode2 := farmerbot.nodes[1]

t.Run("invalid identity", func(t *testing.T) {
_, err := NewFarmerBot(ctx, Config{}, "dev", "invalid", peer.KeyTypeSr25519)
Expand All @@ -71,10 +72,10 @@ func TestFarmerbot(t *testing.T) {
assert.True(t, errors.Is(err, substrate.ErrNotFound))
})

t.Run("test iterateOnNodes: update nodes and power off extra node (periodic wake up: already on)", func(t *testing.T) {
t.Run("test iterateOnNodes: update nodes and power off extra node (respect priority - periodic wake up: already on)", func(t *testing.T) {
mockRMBAndSubstrateCalls(ctx, sub, rmb, inputs, true, true, resources, []string{}, false, false)

sub.EXPECT().SetNodePowerTarget(farmerbot.identity, gomock.Any(), false).Return(types.Hash{}, nil)
sub.EXPECT().SetNodePowerTarget(farmerbot.identity, uint32(2), false).Return(types.Hash{}, nil)

err = farmerbot.iterateOnNodes(ctx, sub)
assert.NoError(t, err)
Expand All @@ -85,8 +86,8 @@ func TestFarmerbot(t *testing.T) {

oldNode1.powerState = off
oldNode2.powerState = off
state.addNode(oldNode1)
state.addNode(oldNode2)
assert.NoError(t, farmerbot.updateNode(oldNode1))
assert.NoError(t, farmerbot.updateNode(oldNode2))
farmerbot.state = state

mockRMBAndSubstrateCalls(ctx, sub, rmb, inputs, false, true, resources, []string{}, false, false)
Expand All @@ -100,8 +101,8 @@ func TestFarmerbot(t *testing.T) {
t.Run("test iterateOnNodes: update nodes (periodic wake up: failed to set off node)", func(t *testing.T) {
oldNode1.powerState = off
oldNode2.powerState = off
state.addNode(oldNode1)
state.addNode(oldNode2)
assert.NoError(t, farmerbot.updateNode(oldNode1))
assert.NoError(t, farmerbot.updateNode(oldNode2))
farmerbot.state = state

mockRMBAndSubstrateCalls(ctx, sub, rmb, inputs, false, true, resources, []string{}, false, false)
Expand Down
112 changes: 54 additions & 58 deletions farmerbot/internal/find_node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ import (
"github.com/threefoldtech/zos/pkg/gridtypes"
)

func reset(t *testing.T, farmerbot FarmerBot, oldNode1, oldNode2 node, oldFarm substrate.Farm) {
t.Helper()

assert.NoError(t, farmerbot.updateNode(oldNode1))
assert.NoError(t, farmerbot.updateNode(oldNode2))
farmerbot.farm = oldFarm
}

func TestFindNode(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down Expand Up @@ -45,15 +53,15 @@ func TestFindNode(t *testing.T) {
assert.NoError(t, err)
farmerbot.state = state

node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.dedicated = false
farmerbot.nodes[1] = node
node2 := farmerbot.nodes[2]
farmerbot.nodes[0] = node
node2 := farmerbot.nodes[1]
node2.dedicated = false
farmerbot.nodes[2] = node2
farmerbot.nodes[1] = node2

oldNode1 := farmerbot.nodes[1]
oldNode2 := farmerbot.nodes[2]
oldNode1 := farmerbot.nodes[0]
oldNode2 := farmerbot.nodes[1]
oldFarm := farmerbot.farm

nodeOptions := NodeFilterOption{
Expand All @@ -67,93 +75,87 @@ func TestFindNode(t *testing.T) {
t.Run("test valid find node: found an ON node", func(t *testing.T) {
nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
_, node, err := farmerbot.getNode(nodeID)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, node)

reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: found an ON node, trying to power off fails because resources is claimed", func(t *testing.T) {
nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, nodeID)

_, node, err := farmerbot.getNode(nodeID)
assert.NoError(t, err)
assert.Contains(t, farmerbot.nodes, node)

err = farmerbot.powerOff(sub, nodeID)
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: found an ON node (first is OFF)", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.powerState = off
farmerbot.nodes[1] = node
farmerbot.nodes[0] = node

nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Equal(t, nodeID, uint32(farmerbot.nodes[2].ID))
assert.Equal(t, nodeID, uint32(farmerbot.nodes[1].ID))

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: node is rented (second node is found)", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.hasActiveRentContract = true
farmerbot.nodes[1] = node
farmerbot.nodes[0] = node

nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.config.IncludedNodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: node is dedicated so node is found", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.dedicated = true
farmerbot.nodes[1] = node
farmerbot.nodes[0] = node

nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.config.IncludedNodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: options and nodes are dedicated and nodes are unused", func(t *testing.T) {
nodeID, err := farmerbot.findNode(sub, nodeOptions)
assert.NoError(t, err)
assert.Contains(t, farmerbot.config.IncludedNodes, nodeID)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test valid find node: no gpus with specified device/vendor in first node (second is found)", func(t *testing.T) {
node2 := farmerbot.nodes[2]
node2 := farmerbot.nodes[1]
node2.gpus = []zos.GPU{
{
Device: "device",
Vendor: "vendor",
},
}
farmerbot.nodes[2] = node2
farmerbot.nodes[1] = node2

nodeID, err := farmerbot.findNode(sub, NodeFilterOption{GPUVendors: []string{"vendor"}, GPUDevices: []string{"device"}})
assert.NoError(t, err)
assert.Equal(t, nodeID, uint32(farmerbot.nodes[2].ID))
assert.Equal(t, nodeID, uint32(farmerbot.nodes[1].ID))

farmerbot.addNode(oldNode2)
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test invalid find node: no gpus in nodes", func(t *testing.T) {
Expand All @@ -162,20 +164,19 @@ func TestFindNode(t *testing.T) {
})

t.Run("test invalid find node: found an OFF node but change power failed", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.powerState = off
node2 := farmerbot.nodes[2]
node2 := farmerbot.nodes[1]
node2.powerState = off
farmerbot.nodes[1] = node
farmerbot.nodes[2] = node2
farmerbot.nodes[0] = node
farmerbot.nodes[1] = node2

sub.EXPECT().SetNodePowerTarget(farmerbot.identity, gomock.Any(), true).Return(types.Hash{}, fmt.Errorf("error"))

_, err := farmerbot.findNode(sub, nodeOptions)
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test invalid find node: no enough public ips", func(t *testing.T) {
Expand Down Expand Up @@ -203,38 +204,33 @@ func TestFindNode(t *testing.T) {
})

t.Run("test valid find node: nodes are dedicated and used, no nodes found", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.dedicated = true
farmerbot.nodes[1] = node
node2 := farmerbot.nodes[2]
farmerbot.nodes[0] = node
node2 := farmerbot.nodes[1]
node2.dedicated = true
farmerbot.nodes[2] = node2
farmerbot.nodes[1] = node2

_, err := farmerbot.findNode(sub, NodeFilterOption{})
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
farmerbot.farm = oldFarm
reset(t, farmerbot, oldNode1, oldNode2, oldFarm)
})

t.Run("test invalid find node: node is excluded", func(t *testing.T) {
_, err := farmerbot.findNode(sub, NodeFilterOption{NodesExcluded: []uint32{uint32(farmerbot.nodes[1].ID), uint32(farmerbot.nodes[2].ID)}})
_, err := farmerbot.findNode(sub, NodeFilterOption{NodesExcluded: []uint32{uint32(farmerbot.nodes[0].ID), uint32(farmerbot.nodes[1].ID)}})
assert.Error(t, err)
})

t.Run("test invalid find node: node cannot claim resources", func(t *testing.T) {
node := farmerbot.nodes[1]
node := farmerbot.nodes[0]
node.resources.total = capacity{}
node2 := farmerbot.nodes[2]
node2 := farmerbot.nodes[1]
node2.resources.total = capacity{}
farmerbot.nodes[1] = node
farmerbot.nodes[2] = node2
farmerbot.nodes[0] = node
farmerbot.nodes[1] = node2

_, err := farmerbot.findNode(sub, nodeOptions)
assert.Error(t, err)

farmerbot.addNode(oldNode1)
farmerbot.addNode(oldNode2)
})
}
Loading

0 comments on commit 65791e6

Please sign in to comment.