Skip to content

Commit

Permalink
Formalize mechanism for recording/managing benchmark results.
Browse files Browse the repository at this point in the history
This patch defines new types and mechanisms for managing benchmark
results using a channel-based appriach, as the previous
gmeasure.Stopwatch-based approach did not provide a mechanism
for associating operations which are part of a larger lifecycle
being benchmarked. (e.g. container CRUD operations)

Signed-off-by: Nashwan Azhari <nazhari@cloudbasesolutions.com>
  • Loading branch information
aznashwan committed Feb 15, 2022
1 parent 404ac96 commit 0153cf3
Show file tree
Hide file tree
Showing 6 changed files with 333 additions and 82 deletions.
1 change: 1 addition & 0 deletions cmd/critest/cri_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ func TestCRISuite(t *testing.T) {
flag.Set("ginkgo.focus", "benchmark")
flag.Set("ginkgo.succinct", "true")
} else {
// Skip benchmark measurements for validation tests.
flag.Set("ginkgo.skipMeasurements", "true")
}
if *parallel > 1 {
Expand Down
26 changes: 26 additions & 0 deletions docs/benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,32 @@ git clone https://github.com/kubernetes-sigs/cri-tools -b release-1.9 $GOPATH/sr

Before running the test, you need to _ensure that the CRI server under test is running and listening on a Unix socket_ or a Windows tcp socket. Because the benchmark tests are designed to request changes (e.g., create/delete) to the containers and verify that correct status is reported, it expects to be the only user of the CRI server. Please make sure that 1) there are no existing CRI-managed containers running on the node, and 2) no other processes (e.g., Kubelet) will interfere with the tests.

### Defining benchmarking parameters

You can optionally specify some parameters detailing how benchmarks should be run.

```yaml
# The number of container lifecycle benchmarks to run:
containersNumber: 100

# The number of container lifecycle benchmarks to run in parallel.
# The total number of samples will be floor(containersNumber / containersNumberParallel)
containersNumberParallel: 2


# The number of pod lifecycle benchmarks to run:
podsNumber: 1000
# The number of pod lifecycle benchmarks to run in parallel.
# The total number of samples will be floor(podsNumber/ podsNumberParallel)
podsNumberParallel: 1
```
### Run
```sh
critest -benchmark
[--benchmarking-params-file /path/to/params.yml]
[--benchmarking-output-dir /path/to/outdir/]
```

This will
Expand All @@ -45,5 +67,9 @@ critest connects to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://l
- `-ginkgo.focus`: Only run the tests that match the regular expression.
- `-image-endpoint`: Set the endpoint of image service. Same with runtime-endpoint if not specified.
- `-runtime-endpoint`: Set the endpoint of runtime service. Default to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://localhost:3735`.
- `-benchmarking-params-file`: optional path to a YAML file containing parameters describing which
benchmarks should be run.
- `-benchmarking-output-dir`: optional path to a pre-existing directory in which to write JSON
files detailing the results of the benchmarks.
- `-ginkgo.skip`: Skip the tests that match the regular expression.
- `-h`: Should help and all supported options.
11 changes: 3 additions & 8 deletions pkg/benchmark/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,9 @@ import (
. "github.com/onsi/gomega"
)

// Transforms a slice of `time.Duration`s into their `int64` nanosecond representations.
func getNanosecondsForDurations(durations []time.Duration) []int64 {
var ns []int64
for _, duration := range durations {
ns = append(ns, duration.Nanoseconds())
}
return ns
}
const (
defaultOperationTimes int = 20
)

// TestPerformance checks configuration parameters (specified through flags) and then runs
// benchmark tests using the Ginkgo runner.
Expand Down
103 changes: 66 additions & 37 deletions pkg/benchmark/container.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -17,22 +17,17 @@ limitations under the License.
package benchmark

import (
"encoding/json"
"io/ioutil"
"path"
"time"

"github.com/golang/glog"
"github.com/kubernetes-sigs/cri-tools/pkg/framework"
. "github.com/onsi/ginkgo"
"github.com/onsi/gomega/gmeasure"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
)

type ContainerExperimentData struct {
CreateContainer, StatusContainer, StopContainer, RemoveContainer, StartContainer []int64
}

var _ = framework.KubeDescribe("Container", func() {
f := framework.NewDefaultCRIFramework()

Expand All @@ -46,75 +41,109 @@ var _ = framework.KubeDescribe("Container", func() {

Context("benchmark about operations on Container", func() {
It("benchmark about basic operations on Container", func() {
// Setup sampling config from TestContext:
samplingConfig := gmeasure.SamplingConfig{
N: framework.TestContext.BenchmarkingParams.ContainersNumber,
NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel,
}
if samplingConfig.N < 1 {
samplingConfig.N = 1
}
if samplingConfig.NumParallel < 1 {
samplingConfig.NumParallel = 1
}

// Setup results reporting channel:
resultsSet := LifecycleBenchmarksResultsSet{
OperationsNames: []string{"CreateContainer", "StartContainer", "StatusContainer", "StopContainer", "RemoveContainer"},
NumParallel: samplingConfig.NumParallel,
Datapoints: make([]LifecycleBenchmarkDatapoint, 0),
}
resultsManager := NewLifecycleBenchmarksResultsManager(
resultsSet,
60,
)
resultsChannel := resultsManager.StartResultsConsumer()

experiment := gmeasure.NewExperiment("ContainerOps")
experiment.Sample(func(idx int) {
var podID string
var podConfig *runtimeapi.PodSandboxConfig
var containerID string
var lastStartTime, lastEndTime int64
var err error
durations := make([]int64, len(resultsSet.OperationsNames))

podID, podConfig = framework.CreatePodSandboxForContainer(rc)

By("CreatingContainer")
stopwatch := experiment.NewStopwatch()
stopwatch.Reset()
startTime := time.Now().UnixNano()
lastStartTime = startTime
containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Benchmark-container-")
stopwatch.Record("CreateContainer")
lastEndTime = time.Now().UnixNano()
durations[0] = lastEndTime - lastStartTime

By("StartingContainer")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
err = rc.StartContainer(containerID)
stopwatch.Record("StartContainer")
lastEndTime = time.Now().UnixNano()
durations[1] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to start Container: %v", err)

By("ContainerStatus")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
_, err = rc.ContainerStatus(containerID)
stopwatch.Record("StatusContainer")
lastEndTime = time.Now().UnixNano()
durations[2] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to get Container status: %v", err)

By("ContainerStop")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
err = rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
stopwatch.Record("StopContainer")
lastEndTime = time.Now().UnixNano()
durations[3] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to stop Container: %v", err)

By("ContainerRemove")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
err = rc.RemoveContainer(containerID)
stopwatch.Record("RemoveContainer")
lastEndTime = time.Now().UnixNano()
durations[4] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to remove Container: %v", err)

res := LifecycleBenchmarkDatapoint{
SampleIndex: idx,
StartTime: startTime,
EndTime: lastEndTime,
OperationsDurationsNs: durations,
MetaInfo: map[string]string{"podId": podID, "containerId": containerID},
}
resultsChannel <- &res

By("stop PodSandbox")
rc.StopPodSandbox(podID)
By("delete PodSandbox")
rc.RemovePodSandbox(podID)

}, gmeasure.SamplingConfig{N: framework.TestContext.BenchmarkingParams.ContainersNumber, NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel})
}, samplingConfig)

data := ContainerExperimentData{
CreateContainer: getNanosecondsForDurations(experiment.Get("CreateContainer").Durations),
StartContainer: getNanosecondsForDurations(experiment.Get("StartContainer").Durations),
StatusContainer: getNanosecondsForDurations(experiment.Get("StatusContainer").Durations),
StopContainer: getNanosecondsForDurations(experiment.Get("StopContainer").Durations),
RemoveContainer: getNanosecondsForDurations(experiment.Get("RemoveContainer").Durations),
// Send nil and give the manager a minute to process any already-queued results:
resultsChannel <- nil
err := resultsManager.AwaitAllResults(60)
if err != nil {
glog.Errorf("Results manager failed to await all results: %s", err)
}

if framework.TestContext.BenchmarkingOutputDir != "" {
filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "container_benchmark_data.json")
data, err := json.MarshalIndent(data, "", " ")
if err == nil {
err = ioutil.WriteFile(filepath, data, 0644)
if err != nil {
glog.Errorf("Failed to write container benchmark data: %v", filepath)
}
} else {
glog.Errorf("Failed to serialize benchmark data: %v", err)
filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "newf_container_benchmark_data.json")
err = resultsManager.WriteResultsFile(filepath)
if err != nil {
glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err)
}
} else {
glog.Infof("No benchmarking output dir provided, skipping writing benchmarking resulsts.")
glog.Infof("No benchmarking output dir provided, skipping writing benchmarking results file.")
glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet)
}
})

})
})
97 changes: 60 additions & 37 deletions pkg/benchmark/pod.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@ package benchmark

import (
"path"
"time"

"github.com/golang/glog"
"github.com/kubernetes-sigs/cri-tools/pkg/framework"
Expand All @@ -26,19 +27,8 @@ import (
"github.com/onsi/gomega/gmeasure"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"

"encoding/json"
"io/ioutil"
)

const (
defaultOperationTimes int = 20
)

type ExperimentData struct {
CreatePod, StatusPod, StopPod, RemovePod []int64
}

var _ = framework.KubeDescribe("PodSandbox", func() {
f := framework.NewDefaultCRIFramework()

Expand All @@ -50,11 +40,36 @@ var _ = framework.KubeDescribe("PodSandbox", func() {

Context("benchmark about operations on PodSandbox", func() {
It("benchmark about lifecycle of PodSandbox", func() {
// Setup sampling config from TestContext:
samplingConfig := gmeasure.SamplingConfig{
N: framework.TestContext.BenchmarkingParams.PodsNumber,
NumParallel: framework.TestContext.BenchmarkingParams.PodsNumberParallel,
}
if samplingConfig.N < 1 {
samplingConfig.N = 1
}
if samplingConfig.NumParallel < 1 {
samplingConfig.NumParallel = 1
}

// Setup results reporting channel:
resultsSet := LifecycleBenchmarksResultsSet{
OperationsNames: []string{"CreatePod", "StatusPod", "StopPod", "RemovePod"},
NumParallel: samplingConfig.NumParallel,
Datapoints: make([]LifecycleBenchmarkDatapoint, 0),
}
resultsManager := NewLifecycleBenchmarksResultsManager(
resultsSet,
60,
)
resultsChannel := resultsManager.StartResultsConsumer()

experiment := gmeasure.NewExperiment("PodLifecycle")
experiment.Sample(func(idx int) {
var lastStartTime, lastEndTime int64
var podID string
var err error
durations := make([]int64, len(resultsSet.OperationsNames))

podSandboxName := "PodSandbox-for-creating-performance-test-" + framework.NewUUID()
uid := framework.DefaultUIDPrefix + framework.NewUUID()
Expand All @@ -67,54 +82,62 @@ var _ = framework.KubeDescribe("PodSandbox", func() {
}

By("Creating a pod")
stopwatch := experiment.NewStopwatch()

startTime := time.Now().UnixNano()
lastStartTime = startTime
podID, err = c.RunPodSandbox(config, framework.TestContext.RuntimeHandler)
stopwatch.Record("CreatePod")
lastEndTime = time.Now().UnixNano()
durations[0] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to create PodSandbox: %v", err)

By("Get Pod status")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
_, err = c.PodSandboxStatus(podID)
stopwatch.Record("StatusPod")
lastEndTime = time.Now().UnixNano()
durations[1] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to get PodStatus: %v", err)

By("Stop PodSandbox")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
err = c.StopPodSandbox(podID)
stopwatch.Record("StopPod")
lastEndTime = time.Now().UnixNano()
durations[2] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to stop PodSandbox: %v", err)

By("Remove PodSandbox")
stopwatch.Reset()
lastStartTime = time.Now().UnixNano()
err = c.RemovePodSandbox(podID)
stopwatch.Record("RemovePod")
lastEndTime = time.Now().UnixNano()
durations[3] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to remove PodSandbox: %v", err)

}, gmeasure.SamplingConfig{N: framework.TestContext.BenchmarkingParams.PodsNumber, NumParallel: framework.TestContext.BenchmarkingParams.PodsNumberParallel})
res := LifecycleBenchmarkDatapoint{
StartTime: startTime,
EndTime: lastEndTime,
OperationsDurationsNs: durations,
MetaInfo: map[string]string{"podId": podID, "podSandboxName": podSandboxName},
}
resultsChannel <- &res

}, samplingConfig)

data := ExperimentData{
CreatePod: getNanosecondsForDurations(experiment.Get("CreatePod").Durations),
StatusPod: getNanosecondsForDurations(experiment.Get("StatusPod").Durations),
StopPod: getNanosecondsForDurations(experiment.Get("StopPod").Durations),
RemovePod: getNanosecondsForDurations(experiment.Get("RemovePod").Durations),
// Send nil and give the manager a minute to process any already-queued results:
resultsChannel <- nil
err := resultsManager.AwaitAllResults(60)
if err != nil {
glog.Errorf("Results manager failed to await all results: %s", err)
}

if framework.TestContext.BenchmarkingOutputDir != "" {
filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "pod_benchmark_data.json")
data, err := json.MarshalIndent(data, "", " ")
if err == nil {
err = ioutil.WriteFile(filepath, data, 0644)
if err != nil {
glog.Errorf("Failed to write container benchmark data: %v", filepath)
}
} else {
glog.Errorf("Failed to serialize benchmark data: %v", err)
filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "newf_pod_benchmark_data.json")
err = resultsManager.WriteResultsFile(filepath)
if err != nil {
glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err)
}
} else {
glog.Infof("No benchmarking out dir provided, skipping writing benchmarking resulsts.")
glog.Infof("No benchmarking out dir provided, skipping writing benchmarking results.")
glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet)
}
})
})

})
Loading

0 comments on commit 0153cf3

Please sign in to comment.