Skip to content

Commit

Permalink
[YUNIKORN-1920] headroom with parent queue usage (#620)
Browse files Browse the repository at this point in the history
Headroom needs to take into account the usage in each queue and not
assume that the full max resource is available if the queue is not the
leaf queue.
Add a queue traker object for any queue that is not found while
traversing the hierarchy. The user/group has an application running in
that queue and will add usage at some point.

Add tests to cover usage and max with different resource types set.

Closes: #620

Signed-off-by: Manikandan R <manirajv06@gmail.com>
  • Loading branch information
wilfred-s authored and manirajv06 committed Aug 29, 2023
1 parent 2f343cc commit f83134b
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 73 deletions.
4 changes: 3 additions & 1 deletion pkg/scheduler/ugm/group_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
package ugm

import (
"strings"
"sync"

"github.com/apache/yunikorn-core/pkg/common"
"github.com/apache/yunikorn-core/pkg/common/configs"
"github.com/apache/yunikorn-core/pkg/common/resources"
"github.com/apache/yunikorn-core/pkg/webservice/dao"
)
Expand Down Expand Up @@ -81,7 +83,7 @@ func (gt *GroupTracker) setLimits(queuePath string, resource *resources.Resource
func (gt *GroupTracker) headroom(queuePath string) *resources.Resource {
gt.Lock()
defer gt.Unlock()
return gt.queueTracker.headroom(queuePath)
return gt.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
}

func (gt *GroupTracker) GetGroupResourceUsageDAOInfo() *dao.GroupResourceUsageDAOInfo {
Expand Down
36 changes: 17 additions & 19 deletions pkg/scheduler/ugm/queue_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,34 +245,32 @@ func (qt *QueueTracker) setLimit(queuePath string, maxResource *resources.Resour
childQueueTracker.maxResources = maxResource
}

func (qt *QueueTracker) headroom(queuePath string) *resources.Resource {
func (qt *QueueTracker) headroom(hierarchy []string) *resources.Resource {
log.Log(log.SchedUGM).Debug("Calculating headroom",
zap.String("queue path", queuePath))
childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
if childQueuePath != common.Empty {
if qt.childQueueTrackers[immediateChildQueueName] != nil {
headroom := qt.childQueueTrackers[immediateChildQueueName].headroom(childQueuePath)
if headroom != nil {
return resources.ComponentWiseMinPermissive(headroom, qt.maxResources)
}
} else {
log.Log(log.SchedUGM).Error("Child queueTracker tracker must be available in child queues map",
zap.String("child queueTracker name", immediateChildQueueName))
return nil
zap.Strings("queue path", hierarchy))
// depth first: all the way to the leaf, create if not exists
// more than 1 in the slice means we need to recurse down
var headroom, childHeadroom *resources.Resource
if len(hierarchy) > 1 {
childName := hierarchy[1]
if qt.childQueueTrackers[childName] == nil {
qt.childQueueTrackers[childName] = newQueueTracker(qt.queuePath, childName)
}
childHeadroom = qt.childQueueTrackers[childName].headroom(hierarchy[1:])
}

// arrived at the leaf or on the way out: check against current max if set
if !resources.Equals(resources.NewResource(), qt.maxResources) {
headroom := qt.maxResources.Clone()
headroom = qt.maxResources.Clone()
headroom.SubOnlyExisting(qt.resourceUsage)
log.Log(log.SchedUGM).Debug("Calculated headroom",
zap.String("queue path", queuePath),
zap.String("queue", qt.queueName),
zap.String("queue path", qt.queuePath),
zap.Stringer("max resource", qt.maxResources),
zap.Stringer("headroom", headroom))
return headroom
}
return nil
if headroom == nil {
return childHeadroom
}
return resources.ComponentWiseMinPermissive(headroom, childHeadroom)
}

func (qt *QueueTracker) getResourceUsageDAOInfo(parentQueuePath string) *dao.ResourceUsageDAOInfo {
Expand Down
110 changes: 58 additions & 52 deletions pkg/scheduler/ugm/queue_tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
package ugm

import (
"strings"
"testing"

"gotest.tools/v3/assert"

"github.com/apache/yunikorn-core/pkg/common/configs"
"github.com/apache/yunikorn-core/pkg/common/resources"
)

Expand Down Expand Up @@ -251,62 +253,66 @@ func TestQTQuotaEnforcement(t *testing.T) {
}

func TestHeadroom(t *testing.T) {
leafQT := newQueueTracker("root.parent", "leaf")

leafMaxRes, err := resources.NewResourceFromConf(map[string]string{"mem": "60M", "vcore": "60"})
if err != nil {
t.Errorf("new resource create returned error or wrong resource: error %t, res %v", err, leafMaxRes)
}

parentQT := newQueueTracker("root", "parent")
parentMaxRes := leafMaxRes.Clone()
resources.Multiply(parentMaxRes, 2)

rootQT := newQueueTracker("", "root")

parentQT.childQueueTrackers["leaf"] = leafQT
rootQT.childQueueTrackers["parent"] = parentQT

// Not even a single queue has been configured with max resource
headroom := rootQT.headroom("root.parent.leaf")
assert.Equal(t, resources.Equals(headroom, nil), true)

leafQT.maxResources = leafMaxRes
parentQT.maxResources = parentMaxRes
var nilResource *resources.Resource
path := "root.parent.leaf"
hierarchy := strings.Split(path, configs.DOT)

// nothing exists make sure the hierarchy gets created
root := newRootQueueTracker()
parent := root.getChildQueueTracker("root.parent")
assert.Assert(t, parent != nil, "parent queue tracker should have been created")
leaf := root.getChildQueueTracker(path)
assert.Assert(t, leaf != nil, "leaf queue tracker should have been created")

// auto created trackers no max resource set
headroom := root.headroom(hierarchy)
assert.Equal(t, headroom, nilResource, "auto create: expected nil resource")

// prep resources to set as usage and max
usage, err := resources.NewResourceFromConf(map[string]string{"mem": "10M", "vcore": "10"})
assert.NilError(t, err, "usage: new resource create returned error")
double := resources.Multiply(usage, 2)
leaf.maxResources = double
parent.maxResources = resources.Multiply(double, 2)

// headroom should be equal to max cap of leaf queue as there is no usage so far
headroom = rootQT.headroom("root.parent.leaf")
assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)

leafResUsage, err := resources.NewResourceFromConf(map[string]string{"mem": "30M", "vcore": "30"})
if err != nil {
t.Errorf("new resource create returned error or wrong resource: error %t, res %v", err, leafResUsage)
}
leafQT.resourceUsage = leafResUsage
headroom = root.headroom(hierarchy)
assert.Assert(t, resources.Equals(headroom, double), "headroom not leaf max")

// headroom should be equal to sub(max cap of leaf queue - resource usage) as there is some usage
headroom = rootQT.headroom("root.parent.leaf")
assert.Equal(t, resources.Equals(headroom, leafResUsage), true)

leafQT.maxResources = resources.Multiply(leafMaxRes, 2)
parentQT.maxResources = leafMaxRes

// headroom should be equal to min (leaf max resources, parent resources)
headroom = rootQT.headroom("root.parent.leaf")
assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)

parentQT.maxResources = resources.NewResource()

// headroom should be equal to sub(max cap of leaf queue - resource usage) as there is some usage in leaf and max res of both root and parent is nil
headroom = rootQT.headroom("root.parent.leaf")
assert.Equal(t, resources.Equals(headroom, resources.Add(leafMaxRes, leafResUsage)), true)

rootQT.maxResources = leafMaxRes

// headroom should be equal to min ( (sub(max cap of leaf queue - resource usage), root resources) as there is some usage in leaf
// and max res of parent is nil
headroom = rootQT.headroom("root.parent.leaf")
assert.Equal(t, resources.Equals(headroom, leafMaxRes), true)
leaf.resourceUsage = usage
headroom = root.headroom(hierarchy)
assert.Assert(t, resources.Equals(headroom, usage), "headroom should be same as usage")

// headroom should be equal to min headroom of parent and leaf: parent has none so zero
parent.maxResources = double
parent.resourceUsage = double
headroom = root.headroom(hierarchy)
assert.Assert(t, resources.IsZero(headroom), "leaf check: parent should have no headroom")

headroom = root.headroom(hierarchy[:2])
assert.Assert(t, resources.IsZero(headroom), "parent check: parent should have no headroom")

// reset usage for the parent
parent.resourceUsage = resources.NewResource()
// set a different type in the parent max and check it is in the headroom
var single, other *resources.Resource
single, err = resources.NewResourceFromConf(map[string]string{"gpu": "1"})
assert.NilError(t, err, "single: new resource create returned error")
parent.maxResources = single
single, err = resources.NewResourceFromConf(map[string]string{"gpu": "1"})
assert.NilError(t, err, "single: new resource create returned error")
combined := resources.Add(usage, single)
headroom = root.headroom(hierarchy)
assert.Assert(t, resources.Equals(headroom, combined), "headroom should be same as combined")

// this "other" resource should be completely ignored as it has no limit
other, err = resources.NewResourceFromConf(map[string]string{"unknown": "100"})
assert.NilError(t, err, "single: new resource create returned error")
parent.resourceUsage = other
root.resourceUsage = other
headroom = root.headroom(hierarchy)
assert.Assert(t, resources.Equals(headroom, combined), "headroom should be same as combined")
}

func getQTResource(qt *QueueTracker) map[string]*resources.Resource {
Expand Down
4 changes: 3 additions & 1 deletion pkg/scheduler/ugm/user_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
package ugm

import (
"strings"
"sync"

"go.uber.org/zap"

"github.com/apache/yunikorn-core/pkg/common"
"github.com/apache/yunikorn-core/pkg/common/configs"
"github.com/apache/yunikorn-core/pkg/common/resources"
"github.com/apache/yunikorn-core/pkg/log"
"github.com/apache/yunikorn-core/pkg/webservice/dao"
Expand Down Expand Up @@ -124,7 +126,7 @@ func (ut *UserTracker) setLimits(queuePath string, resource *resources.Resource,
func (ut *UserTracker) headroom(queuePath string) *resources.Resource {
ut.Lock()
defer ut.Unlock()
return ut.queueTracker.headroom(queuePath)
return ut.queueTracker.headroom(strings.Split(queuePath, configs.DOT))
}

func (ut *UserTracker) GetUserResourceUsageDAOInfo() *dao.UserResourceUsageDAOInfo {
Expand Down

0 comments on commit f83134b

Please sign in to comment.