Skip to content

Commit

Permalink
qemu: Support PCIe device hotplug for q35
Browse files Browse the repository at this point in the history
- add pcie-root-port device to qemu command line for q35
- hotplug a PCIe device into a PCIe Root Port

Fixes: kata-containers#2432

Signed-off-by: Jimmy Xu <junming.xjm@antfin.com>
  • Loading branch information
Jimmy-Xu committed Feb 6, 2020
1 parent fa7d00e commit bb41b72
Show file tree
Hide file tree
Showing 22 changed files with 281 additions and 21 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ DEFENABLEDEBUG := false
DEFDISABLENESTINGCHECKS := false
DEFMSIZE9P := 8192
DEFHOTPLUGVFIOONROOTBUS := false
DEFPCIEROOTPORT := 0

# Default cgroup model
DEFSANDBOXCGROUPONLY ?= false
Expand Down Expand Up @@ -444,6 +445,7 @@ USER_VARS += DEFENABLEDEBUG
USER_VARS += DEFDISABLENESTINGCHECKS
USER_VARS += DEFMSIZE9P
USER_VARS += DEFHOTPLUGVFIOONROOTBUS
USER_VARS += DEFPCIEROOTPORT
USER_VARS += DEFENTROPYSOURCE
USER_VARS += DEFSANDBOXCGROUPONLY
USER_VARS += BUILDFLAGS
Expand Down Expand Up @@ -619,7 +621,8 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
-e "s|@DEFENABLEDEBUG@|$(DEFENABLEDEBUG)|g" \
-e "s|@DEFDISABLENESTINGCHECKS@|$(DEFDISABLENESTINGCHECKS)|g" \
-e "s|@DEFMSIZE9P@|$(DEFMSIZE9P)|g" \
-e "s|@DEFHOTPLUGONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFHOTPLUGVFIOONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFPCIEROOTPORT@|$(DEFPCIEROOTPORT)|g" \
-e "s|@DEFENTROPYSOURCE@|$(DEFENTROPYSOURCE)|g" \
-e "s|@DEFSANDBOXCGROUPONLY@|$(DEFSANDBOXCGROUPONLY)|g" \
$< > $@
Expand Down
7 changes: 7 additions & 0 deletions cli/config/configuration-qemu.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,13 @@ enable_iothreads = @DEFENABLEIOTHREADS@
# Default false
#hotplug_vfio_on_root_bus = true

# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
# The value means the number of pcie_root_port
# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
# Default 0
#pcie_root_port = 2

# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
#disable_vhost_net = true
Expand Down
25 changes: 15 additions & 10 deletions cli/kata-env.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,18 @@ type RuntimeVersionInfo struct {

// HypervisorInfo stores hypervisor details
type HypervisorInfo struct {
MachineType string
Version string
Path string
BlockDeviceDriver string
EntropySource string
Msize9p uint32
MemorySlots uint32
Debug bool
UseVSock bool
SharedFS string
MachineType string
Version string
Path string
BlockDeviceDriver string
EntropySource string
SharedFS string
Msize9p uint32
MemorySlots uint32
PCIeRootPort uint32
HotplugVFIOOnRootBus bool
Debug bool
UseVSock bool
}

// ProxyInfo stores proxy details
Expand Down Expand Up @@ -355,6 +357,9 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
MemorySlots: config.HypervisorConfig.MemSlots,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,

HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
}

Expand Down
5 changes: 5 additions & 0 deletions cli/kata-env_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
blockStorageDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"

Expand Down Expand Up @@ -150,6 +151,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
BlockDeviceDriver: blockStorageDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: hypConfig.NumVCPUs,
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
Expand Down Expand Up @@ -329,6 +331,9 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
Debug: config.HypervisorConfig.Debug,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,

HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
}

Expand Down
2 changes: 2 additions & 0 deletions containerd-shim-v2/create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"

Expand All @@ -416,6 +417,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
SharedFS: sharedFS,
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/katatestutils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type RuntimeConfigOptions struct {
AgentTraceMode string
AgentTraceType string
SharedFS string
PCIeRootPort uint32
DisableBlock bool
EnableIOThreads bool
HotplugVFIOOnRootBus bool
Expand Down Expand Up @@ -59,6 +60,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
guest_hook_path = "` + config.DefaultGuestHookPath + `"
Expand Down
1 change: 1 addition & 0 deletions pkg/katautils/config-settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ const defaultEnableDebug bool = false
const defaultDisableNestingChecks bool = false
const defaultMsize9p uint32 = 8192
const defaultHotplugVFIOOnRootBus bool = false
const defaultPCIeRootPort = 0
const defaultEntropySource = "/dev/urandom"
const defaultGuestHookPath string = ""
const defaultVirtioFSCacheMode = "none"
Expand Down
4 changes: 4 additions & 0 deletions pkg/katautils/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ type hypervisor struct {
MemOffset uint32 `toml:"memory_offset"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
PCIeRootPort uint32 `toml:"pcie_root_port"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
Expand Down Expand Up @@ -648,6 +649,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
UseVSock: useVSock,
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: h.DisableVhostNet,
GuestHookPath: h.guestHookPath(),
}, nil
Expand Down Expand Up @@ -796,6 +798,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: true,
UseVSock: true,
}, nil
Expand Down Expand Up @@ -1073,6 +1076,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
EnableIOThreads: defaultEnableIOThreads,
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
PCIeRootPort: defaultPCIeRootPort,
GuestHookPath: defaultGuestHookPath,
VirtioFSCache: defaultVirtioFSCacheMode,
DisableImageNvdimm: defaultDisableImageNvdimm,
Expand Down
11 changes: 11 additions & 0 deletions pkg/katautils/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"

Expand All @@ -101,6 +102,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: defaultVCPUCount,
DefaultMaxVCPUCount: defaultMaxVCPUCount,
Expand Down Expand Up @@ -158,6 +160,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
Mlock: !defaultEnableSwap,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
Msize9p: defaultMsize9p,
MemSlots: defaultMemSlots,
EntropySource: defaultEntropySource,
Expand Down Expand Up @@ -775,6 +778,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
defer func() {
utils.VHostVSockDevicePath = orgVHostVSockDevicePath
Expand All @@ -789,6 +793,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
UseVSock: true,
}

Expand Down Expand Up @@ -846,6 +851,10 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus {
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
}

if config.PCIeRootPort != pcieRootPort {
t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort)
}
}

func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
Expand All @@ -869,6 +878,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)

hypervisor := hypervisor{
Path: hypervisorPath,
Expand All @@ -879,6 +889,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
}

_, err = newQemuHypervisorConfig(hypervisor)
Expand Down
19 changes: 17 additions & 2 deletions virtcontainers/device/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ var SysDevPrefix = "/sys/dev"
// SysIOMMUPath is static string of /sys/kernel/iommu_groups
var SysIOMMUPath = "/sys/kernel/iommu_groups"

// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
var SysBusPciDevicesPath = "/sys/bus/pci/devices"

// SysBusPciSlotsPath is static string of /sys/bus/pci/slots
var SysBusPciSlotsPath = "/sys/bus/pci/slots"

// DeviceInfo is an embedded type that contains device data common to all types of devices.
type DeviceInfo struct {
// Hostpath is device path on host
Expand Down Expand Up @@ -165,12 +171,15 @@ const (

// VFIODev represents a VFIO drive used for hotplugging
type VFIODev struct {
// ID is used to identify this drive in the hypervisor options.
ID string
// IsPCIe specifies device is PCIe or PCI
IsPCIe bool

// Type of VFIO device
Type VFIODeviceType

// ID is used to identify this drive in the hypervisor options.
ID string

// BDF (Bus:Device.Function) of the PCI address
BDF string

Expand All @@ -182,6 +191,12 @@ type VFIODev struct {

// DeviceID specifies device id
DeviceID string

// PCI Class Code
Class string

// Bus of VFIO PCIe device
Bus string
}

// RNGDev represents a random number generator device
Expand Down
93 changes: 91 additions & 2 deletions virtcontainers/device/drivers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,102 @@
package drivers

import (
"github.com/sirupsen/logrus"
"fmt"
"io/ioutil"
"path/filepath"
"strings"

"github.com/kata-containers/runtime/virtcontainers/device/api"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/sirupsen/logrus"
)

const (
intMax = ^uint(0)

PCIDomain = "0000"
PCIeKeyword = "PCIe"
)

const intMax uint = ^uint(0)
type PCISysFsType string

var (
PCISysFsDevices PCISysFsType = "devices" // /sys/bus/pci/devices
PCISysFsSlots PCISysFsType = "slots" // /sys/bus/pci/slots
)

type PCISysFsProperty string

var (
PCISysFsDevicesClass PCISysFsProperty = "class" // /sys/bus/pci/devices/xxx/class
PCISysFsSlotsAddress PCISysFsProperty = "address" // /sys/bus/pci/slots/xxx/address
PCISysFsSlotsMaxBusSpeed PCISysFsProperty = "max_bus_speed" // /sys/bus/pci/slots/xxx/max_bus_speed
)

func deviceLogger() *logrus.Entry {
return api.DeviceLogger()
}

/*
Identify PCIe device by /sys/bus/pci/slots/xx/max_bus_speed, sample content "8.0 GT/s PCIe"
The /sys/bus/pci/slots/xx/address contains bdf, sample content "0000:04:00"
bdf format: bus:slot.function
*/
func isPCIeDevice(bdf string) bool {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
slots, err := ioutil.ReadDir(config.SysBusPciSlotsPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", config.SysBusPciSlotsPath).Warn("failed to list pci slots")
return false
}
b := strings.Split(bdf, ".")[0]
for _, slot := range slots {
address := getPCISlotProperty(slot.Name(), PCISysFsSlotsAddress)
if b == address {
maxBusSpeed := getPCISlotProperty(slot.Name(), PCISysFsSlotsMaxBusSpeed)
if strings.Contains(maxBusSpeed, PCIeKeyword) {
return true
}
}
}
deviceLogger().WithField("dev-bdf", bdf).Debug("can not find slot for bdf of pci device")
return false
}

// read from /sys/bus/pci/devices/xxx/property
func getPCIDeviceProperty(bdf string, property PCISysFsProperty) string {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
propertyPath := filepath.Join(config.SysBusPciDevicesPath, bdf, string(property))
rlt, err := readPCIProperty(propertyPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci device property")
return ""
}
return rlt
}

// read from /sys/bus/pci/slots/xxx/property
func getPCISlotProperty(slot string, property PCISysFsProperty) string {
propertyPath := filepath.Join(config.SysBusPciSlotsPath, slot, string(property))
rlt, err := readPCIProperty(propertyPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci slot property")
return ""
}
return rlt
}

func readPCIProperty(propertyPath string) (string, error) {
var (
buf []byte
err error
)
if buf, err = ioutil.ReadFile(propertyPath); err != nil {
return "", fmt.Errorf("failed to read pci sysfs %v, error:%v", propertyPath, err)
}
return strings.Split(string(buf), "\n")[0], nil
}
Loading

0 comments on commit bb41b72

Please sign in to comment.