Skip to content

Commit

Permalink
bugfix: Allow for optional timeslicing configuration
Browse files Browse the repository at this point in the history
This fixes a vestigial bug from the introduction of MPS. Now that two timeslicing configurations are available, it's possible for MPS to be configured instead of timeslicing.

But since the TimeSlicing field was made non-optional - its existence is forced when unmarshalling, and then the parsing fails because no resources are specified under the empty TimeSlicing field.
  • Loading branch information
danielkleinstein authored Oct 27, 2024
1 parent 6decc15 commit 1c6e3fe
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 17 deletions.
8 changes: 6 additions & 2 deletions api/config/v1/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,13 @@ func DisableResourceNamingInConfig(logger logger, config *Config) {
config.Resources.MIGs = nil

// Disable renaming / device selection in Sharing.TimeSlicing.Resources
config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing")
if (config.sharing.TimeSlicing != nil) {
config.Sharing.TimeSlicing.disableResoureRenaming(logger, "timeSlicing")
}
// Disable renaming / device selection in Sharing.MPS.Resources
config.Sharing.MPS.disableResoureRenaming(logger, "mps")
if (config.Sharing.MPS != nil) {
config.Sharing.MPS.disableResoureRenaming(logger, "mps")
}
}

// parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct.
Expand Down
6 changes: 3 additions & 3 deletions api/config/v1/sharing.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package v1
// Sharing encapsulates the set of sharing strategies that are supported.
type Sharing struct {
// TimeSlicing defines the set of replicas to be made for timeSlicing available resources.
TimeSlicing ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"`
TimeSlicing *ReplicatedResources `json:"timeSlicing,omitempty" yaml:"timeSlicing,omitempty"`
// MPS defines the set of replicas to be shared using MPS
MPS *ReplicatedResources `json:"mps,omitempty" yaml:"mps,omitempty"`
}
Expand All @@ -38,7 +38,7 @@ func (s *Sharing) SharingStrategy() SharingStrategy {
return SharingStrategyMPS
}

if s.TimeSlicing.isReplicated() {
if s.TimeSlicing != nil && s.TimeSlicing.isReplicated() {
return SharingStrategyTimeSlicing
}
return SharingStrategyNone
Expand All @@ -49,5 +49,5 @@ func (s *Sharing) ReplicatedResources() *ReplicatedResources {
if s.MPS != nil {
return s.MPS
}
return &s.TimeSlicing
return s.TimeSlicing
}
10 changes: 5 additions & 5 deletions internal/lm/mig-strategy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestMigStrategyNoneLabels(t *testing.T) {
devices: []resource.Device{
rt.NewFullGPU(),
},
timeSlicing: spec.ReplicatedResources{
timeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -83,7 +83,7 @@ func TestMigStrategyNoneLabels(t *testing.T) {
rt.NewFullGPU(),
rt.NewFullGPU(),
},
timeSlicing: spec.ReplicatedResources{
timeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -107,7 +107,7 @@ func TestMigStrategyNoneLabels(t *testing.T) {
devices: []resource.Device{
rt.NewMigEnabledDevice(),
},
timeSlicing: spec.ReplicatedResources{
timeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -129,7 +129,7 @@ func TestMigStrategyNoneLabels(t *testing.T) {
rt.NewMigEnabledDevice(),
rt.NewMigEnabledDevice(),
},
timeSlicing: spec.ReplicatedResources{
timeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -151,7 +151,7 @@ func TestMigStrategyNoneLabels(t *testing.T) {
rt.NewMigEnabledDevice(),
rt.NewFullGPU(),
},
timeSlicing: spec.ReplicatedResources{
timeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand Down
2 changes: 1 addition & 1 deletion internal/lm/nvml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func TestSharingLabeler(t *testing.T) {
description: "config with timeslicing replicas",
config: &spec.Config{
Sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Replicas: 2,
Expand Down
6 changes: 3 additions & 3 deletions internal/lm/resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestGPUResourceLabeler(t *testing.T) {
description: "time-slicing ignores non-matching resource",
count: 1,
sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/not-gpu",
Expand All @@ -79,7 +79,7 @@ func TestGPUResourceLabeler(t *testing.T) {
description: "time-slicing appends suffix and doubles count",
count: 1,
sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -103,7 +103,7 @@ func TestGPUResourceLabeler(t *testing.T) {
description: "time-slicing renamed does not append suffix and doubles count",
count: 1,
sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand Down
6 changes: 3 additions & 3 deletions internal/rm/rm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func TestValidateRequest(t *testing.T) {
{
description: "timeslicing with single device",
sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -73,7 +73,7 @@ func TestValidateRequest(t *testing.T) {
{
description: "timeslicing with two devices",
sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
Resources: []spec.ReplicatedResource{
{
Name: "nvidia.com/gpu",
Expand All @@ -93,7 +93,7 @@ func TestValidateRequest(t *testing.T) {
{
description: "timeslicing with two devices -- failRequestsGreaterThanOne",
sharing: spec.Sharing{
TimeSlicing: spec.ReplicatedResources{
TimeSlicing: &spec.ReplicatedResources{
FailRequestsGreaterThanOne: true,
Resources: []spec.ReplicatedResource{
{
Expand Down

0 comments on commit 1c6e3fe

Please sign in to comment.