diff --git a/cmd/katalyst-agent/app/options/qrm/io_plugin.go b/cmd/katalyst-agent/app/options/qrm/io_plugin.go index f0bf4afe8..81000423e 100644 --- a/cmd/katalyst-agent/app/options/qrm/io_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/io_plugin.go @@ -41,6 +41,7 @@ type WritebackThrottlingOption struct { type IOCostOption struct { EnableSettingIOCost bool + CheckWBTDisabled bool IOCostQoSConfigFile string IOCostModelConfigFile string } @@ -62,6 +63,7 @@ func NewIOOptions() *IOOptions { }, IOCostOption: IOCostOption{ EnableSettingIOCost: false, + CheckWBTDisabled: true, IOCostQoSConfigFile: "", IOCostModelConfigFile: "", }, @@ -88,6 +90,8 @@ func (o *IOOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.WBTValueNVME, "writeback throttling value for NVME") fs.BoolVar(&o.EnableSettingIOCost, "enable-io-cost", o.EnableSettingIOCost, "if set it to true, io.cost setting will be executed") + fs.BoolVar(&o.CheckWBTDisabled, "check-wbt-disabled", + o.CheckWBTDisabled, "if set it to true, wbt should be disabled") fs.StringVar(&o.IOCostQoSConfigFile, "io-cost-qos-config-file", o.IOCostQoSConfigFile, "the absolute path of io.cost.qos qos config file") fs.StringVar(&o.IOCostModelConfigFile, "io-cost-model-config-file", @@ -107,6 +111,7 @@ func (o *IOOptions) ApplyTo(conf *qrmconfig.IOQRMPluginConfig) error { conf.WBTValueSSD = o.WBTValueSSD conf.WBTValueNVME = o.WBTValueNVME conf.EnableSettingIOCost = o.EnableSettingIOCost + conf.CheckWBTDisabled = o.CheckWBTDisabled conf.IOCostQoSConfigFile = o.IOCostQoSConfigFile conf.IOCostModelConfigFile = o.IOCostModelConfigFile conf.EnableSettingIOWeight = o.EnableSettingIOWeight diff --git a/pkg/agent/qrm-plugins/io/handlers/iocost/const.go b/pkg/agent/qrm-plugins/io/handlers/iocost/const.go index b0e0f3d18..0ad9f1508 100644 --- a/pkg/agent/qrm-plugins/io/handlers/iocost/const.go +++ b/pkg/agent/qrm-plugins/io/handlers/iocost/const.go @@ -29,6 +29,8 @@ const ( DevModelBcache DevModel = "bcache" DevModelDeviceMapper DevModel = "dm" + sysDiskPrefix = "/sys/block" + IOStatMetricCostVrate = "cost.vrate" MetricNameIOCostVrate = "iocost_vrate" diff --git a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go index 64d6ba1ea..097dee6f8 100644 --- a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go +++ b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go @@ -21,6 +21,7 @@ package iocost import ( "fmt" + "io/ioutil" "path/filepath" "strconv" "sync" @@ -28,7 +29,9 @@ import ( "github.com/kubewharf/katalyst-core/pkg/config" coreconfig "github.com/kubewharf/katalyst-core/pkg/config" dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + coreconsts "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" cgcommon "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" @@ -236,6 +239,33 @@ func applyIOCostConfig(conf *config.Configuration, emitter metrics.MetricEmitter applyIOCostModelWithDefault(ioCostModelConfigs, devsIDToModel) } +func checkWBTDisabled(targetDiskType float64, diskPath string, emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) (bool, error) { + dir, err := ioutil.ReadDir(diskPath) + if err != nil { + general.Errorf("failed to readdir:%v, err:%v", sysDiskPrefix, err) + return false, err + } + for _, entry := range dir { + diskType, err := helper.GetDeviceMetric(metaServer.MetricsFetcher, emitter, coreconsts.MetricIODiskType, entry.Name()) + if err != nil { + general.Errorf("failed to read MetricIODiskType, err:%v", err) + return false, err + } + + if diskType == targetDiskType { + WBTValue, err := helper.GetDeviceMetric(metaServer.MetricsFetcher, emitter, coreconsts.MetricIODiskWBTValue, entry.Name()) + if err != nil { + general.Errorf("failed to read MetricIODiskWBTValue, err:%v", err) + return false, err + } + if WBTValue != 0 { + return false, nil + } + } + } + return true, nil +} + func SetIOCost(conf *coreconfig.Configuration, _ interface{}, _ *dynamicconfig.DynamicAgentConfiguration, @@ -265,6 +295,15 @@ func SetIOCost(conf *coreconfig.Configuration, return } + // Strict mode: checking wbt file. + if conf.CheckWBTDisabled { + disabled, err := checkWBTDisabled(coreconsts.DiskTypeHDD, sysDiskPrefix, emitter, metaServer) + if !disabled { + general.Infof("wbt for HDD disks should be disabled, err=%v", err) + return + } + } + if !cgcommon.CheckCgroup2UnifiedMode() { general.Infof("not in cgv2 environment, skip IOAsyncTaskFunc") return diff --git a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go index 8e17c46a3..f26e4ff3b 100644 --- a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go +++ b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go @@ -133,6 +133,7 @@ func TestSetIOCost(t *testing.T) { IOQRMPluginConfig: &qrm.IOQRMPluginConfig{ IOCostOption: qrm.IOCostOption{ EnableSettingIOCost: true, + CheckWBTDisabled: true, }, }, }, @@ -411,3 +412,15 @@ func TestApplyIOCostQoSWithDefault(t *testing.T) { // Call the function under test applyIOCostQoSWithDefault(ioCostQoSConfigs, devsIDToModel) } + +func TestCheckWBTDisabled(t *testing.T) { + t.Parallel() + metaServer, err := makeMetaServer() + assert.NoError(t, err) + + result, _ := checkWBTDisabled(1.0, sysDiskPrefix, metrics.DummyMetrics{}, metaServer) + assert.False(t, result) + + result, _ = checkWBTDisabled(1.0, "notExist", metrics.DummyMetrics{}, metaServer) + assert.False(t, result) +} diff --git a/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux.go b/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux.go index 7ba32e6f3..4de546034 100644 --- a/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux.go +++ b/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux.go @@ -21,8 +21,11 @@ package ioweight import ( "context" + "fmt" "strconv" + v1 "k8s.io/api/core/v1" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" coreconfig "github.com/kubewharf/katalyst-core/pkg/config" dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" @@ -64,6 +67,26 @@ func applyIOWeightCgroupLevelConfig(conf *coreconfig.Configuration, emitter metr } } +func applyPodIOWeight(pod *v1.Pod, defaultDevID, qosLevelDefaultValue string) error { + ioWeightValue, err := strconv.ParseInt(qosLevelDefaultValue, 10, 64) + if err != nil { + return fmt.Errorf("strconv.ParseInt failed, string=%v, err=%v", qosLevelDefaultValue, err) + } + + podAbsCGPath, err := common.GetPodAbsCgroupPath(common.CgroupSubsysIO, string(pod.UID)) + if err != nil { + return fmt.Errorf("GetPodAbsCgroupPath for pod: %s/%s failed with error: %v", pod.Namespace, pod.Name, err) + } + + err = cgroupmgr.ApplyIOWeightWithAbsolutePath(podAbsCGPath, defaultDevID, uint64(ioWeightValue)) + if err != nil { + return fmt.Errorf("ApplyIOWeightWithAbsolutePath for pod: %s/%s failed with error: %v", pod.Namespace, pod.Name, err) + } + + general.Infof("ApplyIOWeightWithRelativePath for pod: %s/%s, weight: %d successfully", pod.Namespace, pod.Name, ioWeightValue) + return nil +} + func applyIOWeightQoSLevelConfig(conf *coreconfig.Configuration, emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, ) { @@ -102,6 +125,15 @@ func applyIOWeightQoSLevelConfig(conf *coreconfig.Configuration, if !ok { continue } + + // setup pod level. + err = applyPodIOWeight(pod, defaultDevID, qosLevelDefaultValue) + if err != nil { + general.Errorf("Failed to apply IO weight for pod %s/%s: %v", pod.Namespace, pod.Name, err) + continue + } + + // setup contaienr level. for _, containerStatus := range pod.Status.ContainerStatuses { podUID, containerID := string(pod.UID), native.TrimContainerIDPrefix(containerStatus.ContainerID) err := cgroupmgr.ApplyUnifiedDataForContainer(podUID, containerID, extraControlKnobConfigs[controlKnobKeyIOWeight].CgroupSubsysName, cgroupIOWeightName, qosLevelDefaultValue) diff --git a/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux_test.go b/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux_test.go index 318f236a3..b30c22155 100644 --- a/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux_test.go +++ b/pkg/agent/qrm-plugins/io/handlers/ioweight/ioweight_linux_test.go @@ -366,3 +366,24 @@ func TestIOWeightTaskFunc(t *testing.T) { }, }, metrics.DummyMetrics{}, metaServerEmpty) } + +func TestApplyPodIOWeight(t *testing.T) { + t.Parallel() + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "test-uid", + Namespace: "default", + Name: "test-pod", + }, + } + + defaultDevID := "test-dev-id" + qosLevelDefaultValue := "500" + + err := applyPodIOWeight(pod, defaultDevID, qosLevelDefaultValue) + assert.Error(t, err) + + err = applyPodIOWeight(pod, defaultDevID, "test") + assert.Error(t, err) +} diff --git a/pkg/config/agent/qrm/io_plugin.go b/pkg/config/agent/qrm/io_plugin.go index 589063675..1c25fc811 100644 --- a/pkg/config/agent/qrm/io_plugin.go +++ b/pkg/config/agent/qrm/io_plugin.go @@ -34,6 +34,7 @@ type WritebackThrottlingOption struct { type IOCostOption struct { EnableSettingIOCost bool + CheckWBTDisabled bool IOCostQoSConfigFile string IOCostModelConfigFile string }