forked from openshift/machine-config-operator
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use bfq scheduler on control plane, idle I/O for rpm-ostreed
Part of solving openshift#1897 A lot more details in https://hackmd.io/WeqiDWMAQP2sNtuPRul9QA The TL;DR is that the `bfq` I/O scheduler better respects IO priorities, and also does a better job of handling latency sensitive processes like `etcd` versus bulk/background I/O .
- Loading branch information
Showing
4 changed files
with
159 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package daemon | ||
|
||
// This file provides changes that we make to the control plane | ||
// only. | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"strings" | ||
|
||
"github.com/golang/glog" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
// setRootDeviceSchedulerBFQ switches to the `bfq` I/O scheduler | ||
// for the root block device to better share I/O between etcd | ||
// and other processes. See | ||
// https://github.com/openshift/machine-config-operator/issues/1897 | ||
// Note this is the current systemd default in Fedora, but not RHEL8, | ||
// except for NVMe devices. | ||
func setRootDeviceSchedulerBFQ() error { | ||
sched := "bfq" | ||
|
||
rootDevSysfs, err := getRootBlockDeviceSysfs() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
schedulerPath := filepath.Join(rootDevSysfs, "/queue/scheduler") | ||
schedulerContentsBuf, err := ioutil.ReadFile(schedulerPath) | ||
if err != nil { | ||
return err | ||
} | ||
schedulerContents := string(schedulerContentsBuf) | ||
if strings.Contains(schedulerContents, fmt.Sprintf("[%s]", sched)) { | ||
glog.Infof("Device %s already uses scheduler %s", rootDevSysfs, sched) | ||
return nil | ||
} | ||
|
||
f, err := os.OpenFile(schedulerPath, os.O_WRONLY|os.O_TRUNC, 0644) | ||
if err != nil { | ||
return err | ||
} | ||
defer f.Close() | ||
_, err = f.Write([]byte(sched)) | ||
if err != nil { | ||
return err | ||
} | ||
glog.Infof("Set root blockdev %s to use scheduler %v", rootDevSysfs, sched) | ||
|
||
return nil | ||
} | ||
|
||
// updateOstreeObjectSync enables "per-object-fsync" which helps avoid | ||
// latency spikes for etcd; see https://github.com/ostreedev/ostree/pull/2152 | ||
func updateOstreeObjectSync() error { | ||
if err := exec.Command("ostree", "--repo=/sysroot/ostree/repo", "config", "set", "core.per-object-fsync", "true").Run(); err != nil { | ||
return errors.Wrapf(err, "Failed to set per-object-fsync for ostree") | ||
} | ||
return nil | ||
} | ||
|
||
// initializeControlPlane performs setup for the node that should | ||
// only occur on the control plane. Currently this switches the IO | ||
// scheduler and starts a goroutine acting as a small controller | ||
// for reflecting the etcd leader status in the node object to help | ||
// the MCC coordinate control plane updates. | ||
func (dn *Daemon) initializeControlPlane() error { | ||
if err := setRootDeviceSchedulerBFQ(); err != nil { | ||
return err | ||
} | ||
if err := updateOstreeObjectSync(); err != nil { | ||
return err | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package daemon | ||
|
||
// This file provides routines that apply on Fedora CoreOS style systems, | ||
// including deriviatives like RHEL CoreOS. | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
// byLabel returns the udev generated symlink to the block device with the given label | ||
func byLabel(label string) string { | ||
return fmt.Sprintf("/dev/disk/by-label/%s", label) | ||
} | ||
|
||
// getParentDeviceSysfs returns e.g. /sys/devices/pci0000:00/0000:00:05.0/virtio2/block/vda from /dev/vda4, though | ||
// it can be more complex than that with e.g. NVMe. | ||
func getParentDeviceSysfs(device string) (string, error) { | ||
target, err := os.Readlink(device) | ||
if err != nil { | ||
return "", errors.Wrapf(err, "reading %s", device) | ||
} | ||
sysfsDevLink := fmt.Sprintf("/sys/class/block/%s", filepath.Base(target)) | ||
sysfsDev, err := filepath.EvalSymlinks(sysfsDevLink) | ||
if err != nil { | ||
return "", errors.Wrapf(err, "parsing %s", sysfsDevLink) | ||
} | ||
if _, err := os.Stat(filepath.Join(sysfsDev, "partition")); err == nil { | ||
sysfsDev = filepath.Dir(sysfsDev) | ||
} | ||
return sysfsDev, nil | ||
} | ||
|
||
// getRootBlockDeviceSysfs returns the path to the block | ||
// device backing the root partition on a FCOS system | ||
func getRootBlockDeviceSysfs() (string, error) { | ||
// Check for the `crypt_rootfs` label; this exists for RHCOS >= 4.3 but <= 4.6. | ||
// See https://github.com/openshift/enhancements/blob/master/enhancements/rhcos/automated-policy-based-disk-encryption.md | ||
luksRoot := byLabel("crypt_rootfs") | ||
if _, err := os.Stat(luksRoot); err == nil { | ||
return getParentDeviceSysfs(luksRoot) | ||
} | ||
// This is what we expect on FCOS and RHCOS <= 4.2 | ||
root := byLabel("root") | ||
if _, err := os.Stat(root); err == nil { | ||
return getParentDeviceSysfs(root) | ||
} | ||
return "", fmt.Errorf("Failed to find %s", root) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
templates/master/00-master/_base/units/rpm-ostreed.service.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
name: rpm-ostreed.service | ||
dropins: | ||
- name: mco-controlplane-nice.conf | ||
contents: | | ||
# See https://github.com/openshift/machine-config-operator/issues/1897 | ||
[Service] | ||
Nice=10 | ||
IOSchedulingClass=idle |