Skip to content

Commit

Permalink
feat(fio): add option to disable runtime (#1601)
Browse files Browse the repository at this point in the history
  • Loading branch information
emosbaugh authored Aug 22, 2024
1 parent ff31f5a commit 1b1efa1
Show file tree
Hide file tree
Showing 14 changed files with 288 additions and 15 deletions.
6 changes: 6 additions & 0 deletions config/crds/troubleshoot.sh_hostcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,12 @@ spec:
background IOPS feature if enabled, since those must be fixed at 4096.
format: int64
type: integer
runTime:
description: |-
Limit runtime. The test will run until it completes the configured I/O workload or until it
has run for this specified amount of time, whichever occurs first. When the unit is omitted,
the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
type: string
sync:
description: Whether to call sync on the file after each
write. Does not apply to background IOPS task.
Expand Down
12 changes: 12 additions & 0 deletions config/crds/troubleshoot.sh_hostpreflights.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,12 @@ spec:
background IOPS feature if enabled, since those must be fixed at 4096.
format: int64
type: integer
runTime:
description: |-
Limit runtime. The test will run until it completes the configured I/O workload or until it
has run for this specified amount of time, whichever occurs first. When the unit is omitted,
the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
type: string
sync:
description: Whether to call sync on the file after each
write. Does not apply to background IOPS task.
Expand Down Expand Up @@ -1819,6 +1825,12 @@ spec:
background IOPS feature if enabled, since those must be fixed at 4096.
format: int64
type: integer
runTime:
description: |-
Limit runtime. The test will run until it completes the configured I/O workload or until it
has run for this specified amount of time, whichever occurs first. When the unit is omitted,
the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
type: string
sync:
description: Whether to call sync on the file after each
write. Does not apply to background IOPS task.
Expand Down
6 changes: 6 additions & 0 deletions config/crds/troubleshoot.sh_preflights.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18723,6 +18723,12 @@ spec:
background IOPS feature if enabled, since those must be fixed at 4096.
format: int64
type: integer
runTime:
description: |-
Limit runtime. The test will run until it completes the configured I/O workload or until it
has run for this specified amount of time, whichever occurs first. When the unit is omitted,
the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
type: string
sync:
description: Whether to call sync on the file after each
write. Does not apply to background IOPS task.
Expand Down
6 changes: 6 additions & 0 deletions config/crds/troubleshoot.sh_remotecollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ spec:
background IOPS feature if enabled, since those must be fixed at 4096.
format: int64
type: integer
runTime:
description: |-
Limit runtime. The test will run until it completes the configured I/O workload or until it
has run for this specified amount of time, whichever occurs first. When the unit is omitted,
the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
type: string
sync:
description: Whether to call sync on the file after each
write. Does not apply to background IOPS task.
Expand Down
6 changes: 6 additions & 0 deletions config/crds/troubleshoot.sh_supportbundles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19951,6 +19951,12 @@ spec:
background IOPS feature if enabled, since those must be fixed at 4096.
format: int64
type: integer
runTime:
description: |-
Limit runtime. The test will run until it completes the configured I/O workload or until it
has run for this specified amount of time, whichever occurs first. When the unit is omitted,
the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
type: string
sync:
description: Whether to call sync on the file after each
write. Does not apply to background IOPS task.
Expand Down
13 changes: 10 additions & 3 deletions pkg/analyze/host_filesystem_performance.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,18 @@ func (a *AnalyzeHostFilesystemPerformance) Analyze(
return nil, errors.Wrapf(err, "failed to unmarshal fio results from %s", name)
}

if len(fioResult.Jobs) == 0 {
return nil, errors.Errorf("no jobs found in fio results from %s", name)
var job *collect.FioJobs
for _, j := range fioResult.Jobs {
if j.JobName == collect.FioJobName {
job = &j
break
}
}
if job == nil {
return nil, errors.Errorf("no job named 'fsperf' found in fio results from %s", name)
}

fioWriteLatency := fioResult.Jobs[0].Sync
fioWriteLatency := job.Sync

fsPerf := fioWriteLatency.FSPerfResults()
if err := json.Unmarshal(contents, &fsPerf); err != nil {
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/hostcollector_shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ type FilesystemPerformance struct {
// Whether to call datasync on the file after each write. Skipped if Sync is also true. Does not
// apply to background IOPS task.
Datasync bool `json:"datasync,omitempty"`
// Limit runtime. The test will run until it completes the configured I/O workload or until it
// has run for this specified amount of time, whichever occurs first. When the unit is omitted,
// the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
RunTime *string `json:"runTime,omitempty"`

// Total timeout, including background IOPS setup and warmup if enabled.
Timeout string `json:"timeout,omitempty"`

Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/remote_collector_shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ type RemoteFilesystemPerformance struct {
Datasync bool `json:"datasync,omitempty"`
// Total timeout, including background IOPS setup and warmup if enabled.
Timeout string `json:"timeout,omitempty"`
// Limit runtime. The test will run until it completes the configured I/O workload or until it
// has run for this specified amount of time, whichever occurs first. When the unit is omitted,
// the value is interpreted in seconds. Defaults to 120 seconds. Set to "0" to disable.
RunTime *string `json:"runTime,omitempty"`

// Enable the background IOPS feature.
EnableBackgroundIOPS bool `json:"enableBackgroundIOPS"`
Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 33 additions & 3 deletions pkg/collect/host_filesystem_performance.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ import (
"k8s.io/klog/v2"
)

const (
FioJobName = "fsperf"
DefaultFioRunTime = "120"
)

type Durations []time.Duration

func (d Durations) Len() int {
Expand Down Expand Up @@ -367,22 +372,47 @@ func parseCollectorOptions(hostCollector *troubleshootv1beta2.FilesystemPerforma
return nil, nil, errors.New("Directory is required to collect filesystem performance info")
}

runtime, err := getFioRuntime(hostCollector.RunTime)
if err != nil {
return nil, nil, err
}

latencyBenchmarkOptions := FioJobOptions{
RW: "write",
IOEngine: "sync",
FDataSync: "1",
Directory: hostCollector.Directory,
Size: strconv.FormatUint(fileSize, 10),
BS: strconv.FormatUint(operationSize, 10),
Name: "fsperf",
RunTime: "120",
Name: FioJobName,
RunTime: runtime,
}

command := buildFioCommand(latencyBenchmarkOptions)

return command, &latencyBenchmarkOptions, nil
}

// getFioRuntime returns the runTime value or the default if runTime is nil, empty or <= 0.
// This attepmts to maintain backwards compatibility prior to adding runTime to the collector spec,
// defaulting to 120 seconds.
func getFioRuntime(runTime *string) (string, error) {
if runTime == nil {
return DefaultFioRunTime, nil
}
if *runTime == "" {
return "", nil // disable
}
i, err := strconv.Atoi(*runTime)
if err != nil {
return "", errors.Wrapf(err, "failed to parse runTime %q", *runTime)
}
if i <= 0 {
return "", nil // disable
}
return *runTime, nil
}

func buildFioCommand(opts FioJobOptions) []string {
command := []string{"fio"}
v := reflect.ValueOf(opts)
Expand All @@ -407,7 +437,7 @@ func collectFioResults(ctx context.Context, hostCollector *troubleshootv1beta2.F
}

klog.V(2).Infof("collecting fio results: %s", strings.Join(command, " "))
output, err := exec.CommandContext(ctx, command[0], command[1:]...).Output()
output, err := exec.CommandContext(ctx, command[0], command[1:]...).Output() // #nosec G204
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
if exitErr.ExitCode() == 1 {
Expand Down
33 changes: 24 additions & 9 deletions pkg/collect/host_filesystem_performance_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ func collectHostFilesystemPerformance(hostCollector *troubleshootv1beta2.Filesys
}
timeout = d
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
timeoutCtx, timeoutCancel := context.WithTimeout(context.Background(), timeout)
defer timeoutCancel()

// Start a new context for the fio collector and handle the timeout separately so we can
// distinguish between the timeout and a command failure in the analyzer.
collectCtx, collectCancel := context.WithCancel(context.Background())
defer collectCancel()

collectorName := hostCollector.CollectorName
if collectorName == "" {
Expand All @@ -53,7 +58,7 @@ func collectHostFilesystemPerformance(hostCollector *troubleshootv1beta2.Filesys
jobs := hostCollector.BackgroundReadIOPSJobs + hostCollector.BackgroundWriteIOPSJobs
done := make(chan bool, jobs)
defer func() {
cancel()
collectCancel()
for i := 0; i < jobs; i++ {
<-done
}
Expand All @@ -65,24 +70,34 @@ func collectHostFilesystemPerformance(hostCollector *troubleshootv1beta2.Filesys
jobs: hostCollector.BackgroundReadIOPSJobs,
directory: hostCollector.Directory,
}
backgroundIOPS(ctx, opts, done)
backgroundIOPS(collectCtx, opts, done)

opts = backgroundIOPSOpts{
read: false,
iopsLimit: hostCollector.BackgroundWriteIOPS,
jobs: hostCollector.BackgroundWriteIOPSJobs,
directory: hostCollector.Directory,
}
backgroundIOPS(ctx, opts, done)
backgroundIOPS(collectCtx, opts, done)

time.Sleep(time.Second * time.Duration(hostCollector.BackgroundIOPSWarmupSeconds))
}

var fioResult *FioResult

fioResult, err := collectFioResults(ctx, hostCollector)
if err != nil {
return nil, errors.Wrap(err, "failed to collect fio results")
errCh := make(chan error, 1)
go func() {
var err error
fioResult, err = collectFioResults(collectCtx, hostCollector)
errCh <- err
}()

select {
case <-timeoutCtx.Done():
return nil, errors.New("timeout")
case err := <-errCh:
if err != nil {
return nil, errors.Wrap(err, "failed to collect fio results")
}
}

b, err := json.Marshal(fioResult)
Expand Down
Loading

0 comments on commit 1b1efa1

Please sign in to comment.