-
Notifications
You must be signed in to change notification settings - Fork 295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor Preemption to interact with Quota/Usage through ClusterQueueSnapshot interface #2595
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -467,21 +467,17 @@ func cqHeapFromCandidates(candidates []*workload.Info, firstOnly bool, snapshot | |
return cqHeap | ||
} | ||
|
||
type resourcesPerFlavor map[kueue.ResourceFlavorReference]sets.Set[corev1.ResourceName] | ||
type resourcesPerFlavor = sets.Set[resources.FlavorResource] | ||
|
||
func resourcesRequiringPreemption(assignment flavorassigner.Assignment) resourcesPerFlavor { | ||
resPerFlavor := make(resourcesPerFlavor) | ||
resPerFlavor := sets.New[resources.FlavorResource]() | ||
for _, ps := range assignment.PodSets { | ||
for res, flvAssignment := range ps.Flavors { | ||
// assignments with NoFit mode wouldn't enter the preemption path. | ||
if flvAssignment.Mode != flavorassigner.Preempt { | ||
continue | ||
} | ||
if resPerFlavor[flvAssignment.Name] == nil { | ||
resPerFlavor[flvAssignment.Name] = sets.New(res) | ||
} else { | ||
resPerFlavor[flvAssignment.Name].Insert(res) | ||
} | ||
resPerFlavor.Insert(resources.FlavorResource{Flavor: flvAssignment.Name, Resource: res}) | ||
} | ||
} | ||
return resPerFlavor | ||
|
@@ -543,15 +539,9 @@ func cqIsBorrowing(cq *cache.ClusterQueueSnapshot, resPerFlv resourcesPerFlavor) | |
if cq.Cohort == nil { | ||
return false | ||
} | ||
for _, rg := range cq.ResourceGroups { | ||
for _, fName := range rg.Flavors { | ||
fUsage := cq.Usage[fName] | ||
for rName := range resPerFlv[fName] { | ||
quota := cq.QuotaFor(resources.FlavorResource{Flavor: fName, Resource: rName}) | ||
if fUsage[rName] > quota.Nominal { | ||
return true | ||
} | ||
} | ||
for fr := range resPerFlv { | ||
if cq.Borrowing(fr) { | ||
return true | ||
} | ||
} | ||
return false | ||
|
@@ -560,7 +550,7 @@ func cqIsBorrowing(cq *cache.ClusterQueueSnapshot, resPerFlv resourcesPerFlavor) | |
func workloadUsesResources(wl *workload.Info, resPerFlv resourcesPerFlavor) bool { | ||
for _, ps := range wl.TotalRequests { | ||
for res, flv := range ps.Flavors { | ||
if resPerFlv[flv].Has(res) { | ||
if resPerFlv.Has(resources.FlavorResource{Flavor: flv, Resource: res}) { | ||
return true | ||
} | ||
} | ||
|
@@ -572,58 +562,21 @@ func workloadUsesResources(wl *workload.Info, resPerFlv resourcesPerFlavor) bool | |
// requestable resources and simulated usage of the ClusterQueue and its cohort, | ||
// if it belongs to one. | ||
func workloadFits(wlReq resources.FlavorResourceQuantitiesFlat, cq *cache.ClusterQueueSnapshot, allowBorrowing bool) bool { | ||
for _, rg := range cq.ResourceGroups { | ||
for _, fName := range rg.Flavors { | ||
cqResUsage := cq.Usage[fName] | ||
for rName := range rg.CoveredResources { | ||
resource := cq.QuotaFor(resources.FlavorResource{Flavor: fName, Resource: rName}) | ||
rReq, found := wlReq[resources.FlavorResource{Flavor: fName, Resource: rName}] | ||
if !found { | ||
// Workload doesn't request this FlavorResource. | ||
continue | ||
} | ||
|
||
if cq.Cohort == nil || !allowBorrowing { | ||
if cqResUsage[rName]+rReq > resource.Nominal { | ||
return false | ||
} | ||
} else { | ||
// When resource.BorrowingLimit == nil there is no borrowing | ||
// limit, so we can skip the check. | ||
if resource.BorrowingLimit != nil { | ||
if cqResUsage[rName]+rReq > resource.Nominal+*resource.BorrowingLimit { | ||
return false | ||
} | ||
} | ||
} | ||
|
||
if cq.Cohort != nil { | ||
cohortResUsage := cq.UsedCohortQuota(fName, rName) | ||
requestableQuota := cq.RequestableCohortQuota(fName, rName) | ||
if cohortResUsage+rReq > requestableQuota { | ||
return false | ||
} | ||
} | ||
} | ||
for fr, v := range wlReq { | ||
if !allowBorrowing && cq.BorrowingWith(fr, v) { | ||
return false | ||
} | ||
if v > cq.Available(fr) { | ||
return false | ||
} | ||
} | ||
return true | ||
} | ||
|
||
func queueUnderNominalInResourcesNeedingPreemption(resPerFlv resourcesPerFlavor, cq *cache.ClusterQueueSnapshot) bool { | ||
for _, rg := range cq.ResourceGroups { | ||
for _, fName := range rg.Flavors { | ||
flvReq, found := resPerFlv[fName] | ||
if !found { | ||
// Workload doesn't request this flavor. | ||
continue | ||
} | ||
cqResUsage := cq.Usage[fName] | ||
for rName := range flvReq { | ||
if cqResUsage[rName] >= cq.QuotaFor(resources.FlavorResource{Flavor: fName, Resource: rName}).Nominal { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note to reviewers: this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, a more accurate check would be whether the usage plus the resources for the incoming workload would be borrowing. If it's not borrowing, then this means that this CQ is preempting to reclaim quota, then it is allowed to preempt other workloads in the cohort. Otherwise, it should only be allowed to preempt workloads within its CQ or those below the threshold. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there's a bug in the accounting, let's fix it in a follow-up PR, as this PR is intended to cleanup existing code without any change in behavior There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think ack, I remember this was quite complex, so I would prefer to keep the logic in this PR, and do a dedicated one for fix if needed. I will yet look a bit into this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Folded the two functions into one #2595 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've met with @gabesaba and agreed that he will try to adjust this refactoring PR not to change any logic.We will have a dedicated follow up to adjust the logic if needed. We will then consider changing <, with <=, or the idea from #2595 (comment). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Unfolded the functions, and added back in the cohort == nil check. |
||
return false | ||
} | ||
} | ||
for fr := range resPerFlv { | ||
if cq.Usage.For(fr) >= cq.QuotaFor(fr).Nominal { | ||
return false | ||
} | ||
} | ||
return true | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we get rid of this check? if so, can fold
queueUnderNominalInResourcesNeedingPreemption
into this function. unit tests still pass after its removalThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I think the check can be removed, because this logic is only called for CQs that belong to the same cohort as the preempting one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replaced usages of
queueUnderNominalInResourcesNeedingPreemption
with!cqIsBorrowing