diff --git a/agent.go b/agent.go index 7c9a758e47..5296f0ee99 100644 --- a/agent.go +++ b/agent.go @@ -181,7 +181,7 @@ var logsVSockPort = uint32(0) var debugConsoleVSockPort = uint32(0) // Timeout waiting for a device to be hotplugged -var hotplugTimeout = 3 * time.Second +var hotplugTimeout = 10 * time.Second // Specify the log level var logLevel = defaultLogLevel @@ -761,8 +761,7 @@ func (s *sandbox) listenToUdevEvents() { fieldLogger.Infof("Received add uevent") // Check if device hotplug event results in a device node being created. - if uEv.DevName != "" && - (strings.HasPrefix(uEv.DevPath, rootBusPath) || strings.HasPrefix(uEv.DevPath, acpiDevPath)) { + if strings.HasPrefix(uEv.DevPath, rootBusPath) || strings.HasPrefix(uEv.DevPath, acpiDevPath) { // Lock is needed to safely read and modify the sysToDevMap and deviceWatchers. // This makes sure that watchers do not access the map while it is being updated. s.Lock() diff --git a/agent_test.go b/agent_test.go index 0cd73763cd..9ae183b942 100644 --- a/agent_test.go +++ b/agent_test.go @@ -30,7 +30,6 @@ const ( testExecID = "testExecID" testContainerID = "testContainerID" testFileMode = os.FileMode(0640) - testDirMode = os.FileMode(0750) ) func createFileWithPerms(file, contents string, perms os.FileMode) error { diff --git a/device.go b/device.go index 478809c134..a94d9d640f 100644 --- a/device.go +++ b/device.go @@ -34,15 +34,15 @@ const ( driverNvdimmType = "nvdimm" driverEphemeralType = "ephemeral" driverLocalType = "local" - vmRootfs = "/" -) -const ( - pciBusMode = 0220 + //VFIO devices to be bound to the VM's native drivers (this is + //Kata specific behaviour, that requires careful configuration + //to get something usable inside the container) + driverVfioVmType = "vfio-vm" + vmRootfs = "/" ) var ( - pciBusRescanFile = sysfsDir + "/bus/pci/rescan" systemDevPath = "/dev" getSCSIDevPath = getSCSIDevPathImpl getPmemDevPath = getPmemDevPathImpl @@ -97,10 +97,7 @@ var deviceHandlerList = map[string]deviceHandler{ driverBlkCCWType: virtioBlkCCWDeviceHandler, driverSCSIType: virtioSCSIDeviceHandler, driverNvdimmType: nvdimmDeviceHandler, -} - -func rescanPciBus() error { - return ioutil.WriteFile(pciBusRescanFile, []byte{'1'}, pciBusMode) + driverVfioVmType: vfioDeviceHandler, } // pciPathToSysfs fetches the sysfs path for a PCI path, relative to @@ -146,6 +143,7 @@ func pciPathToSysfsImpl(pciPath PciPath) (string, error) { } func getDeviceName(s *sandbox, devID string) (string, error) { + var found bool var devName string var notifyChan chan string @@ -155,7 +153,7 @@ func getDeviceName(s *sandbox, devID string) (string, error) { s.Lock() for key, value := range s.sysToDevMap { if strings.Contains(key, devID) { - devName = value + devName, found = value, true fieldLogger.Infof("Device: %s found in device map", devID) break } @@ -166,13 +164,13 @@ func getDeviceName(s *sandbox, devID string) (string, error) { // The key of the watchers map is the device we are interested in. // Note this is done inside the lock, not to miss any events from the // global udev listener. - if devName == "" { + if !found { notifyChan = make(chan string, 1) s.deviceWatchers[devID] = notifyChan } s.Unlock() - if devName == "" { + if !found { fieldLogger.Infof("Waiting on channel for device: %s notification", devID) select { case devName = <-notifyChan: @@ -196,14 +194,6 @@ func getPCIDeviceNameImpl(s *sandbox, pciPath PciPath) (string, error) { return "", err } - fieldLogger := agentLog.WithField("sysfsRelPath", sysfsRelPath) - - // Rescan pci bus if we need to wait for a new pci device - if err = rescanPciBus(); err != nil { - fieldLogger.WithError(err).Error("Failed to scan pci bus") - return "", err - } - return getDeviceName(s, sysfsRelPath) } @@ -262,6 +252,46 @@ func nvdimmDeviceHandler(_ context.Context, device pb.Device, spec *pb.Spec, s * return updateSpecDeviceList(device, spec, devIdx) } +// device.Options should have one entry for each PCI device in the VFIO group +// Each option should have the form "DDDD:BB:DD.F=NN/MM" +// DDDD:BB:DD.F is the device's PCI address in the host +// NN is the PCI slot of the device's bridge, MM is the PCI slot of the device +func vfioDeviceHandler(ctx context.Context, device pb.Device, spec *pb.Spec, s *sandbox, devIdx devIndex) error { + fieldLogger := agentLog.WithField("host-dev", device.ContainerPath) + + for _, opt := range device.Options { + tokens := strings.Split(opt, "=") + if len(tokens) != 2 { + return fmt.Errorf("Malformed VFIO option %q", opt) + } + + hostBdf := tokens[0] + guestPCIPath := PciPath{tokens[1]} + + sysfsRelPath, err := pciPathToSysfs(guestPCIPath) + if err != nil { + return err + } + + // We don't actually care what the /dev node is called + // (and there may not be any), but this will wait for + // a uevent that indicates the device is ready + _, err = getDeviceName(s, sysfsRelPath) + if err != nil { + return err + } + + tokens = strings.Split(sysfsRelPath, "/") + guestBdf := tokens[len(tokens)-1] + + fieldLogger.WithField("host-bdf", hostBdf).WithField("guest-bdf", guestBdf).Debug("VFIO: PCI device ready") + } + + fieldLogger.Debug("VFIO: Group complete") + + return nil +} + // updateSpecDeviceList takes a device description provided by the caller, // trying to find it on the guest. Once this device has been identified, the // "real" information that can be read from inside the VM is used to update diff --git a/device_test.go b/device_test.go index 2259e57560..a7da68e5c9 100644 --- a/device_test.go +++ b/device_test.go @@ -731,45 +731,6 @@ func TestUpdateSpecDeviceListCharBlockConflict(t *testing.T) { assert.Equal(hostMinor, spec.Linux.Resources.Devices[1].Minor) } -func TestRescanPciBus(t *testing.T) { - skipUnlessRoot(t) - - assert := assert.New(t) - - err := rescanPciBus() - assert.Nil(err) - -} - -func TestRescanPciBusSubverted(t *testing.T) { - assert := assert.New(t) - - dir, err := ioutil.TempDir("", "") - assert.NoError(err) - defer os.RemoveAll(dir) - - rescanDir := filepath.Join(dir, "rescan-dir") - - err = os.MkdirAll(rescanDir, testDirMode) - assert.NoError(err) - - rescan := filepath.Join(rescanDir, "rescan") - - savedFile := pciBusRescanFile - defer func() { - pciBusRescanFile = savedFile - }() - - pciBusRescanFile = rescan - - err = rescanPciBus() - assert.NoError(err) - - os.RemoveAll(rescanDir) - err = rescanPciBus() - assert.Error(err) -} - func TestVirtioMmioBlkDeviceHandler(t *testing.T) { assert := assert.New(t) @@ -834,6 +795,47 @@ func TestNvdimmDeviceHandler(t *testing.T) { assert.Error(err) } +func TestVfioVmDeviceHandler(t *testing.T) { + assert := assert.New(t) + + hostBdf := "0000:0a:0b.0" + guestPciPath := "1c/1d" + guestBridgeBdf := "0000:00:1c.0" + guestBdf := "0000:0e:1d.0" + pciSysPath := filepath.Join(guestBridgeBdf, guestBdf) + guestSysPath := filepath.Join(sysfsDir, rootBusPath, pciSysPath) + + device := pb.Device{ + Type: driverVfioVmType, + Options: []string{fmt.Sprintf("%s=%s", hostBdf, guestPciPath)}, + } + spec := &pb.Spec{} + devIdx := makeDevIndex(spec) + sb := &sandbox{ + sysToDevMap: make(map[string]string), + } + + // Pre-populate the sysToDevMap, since the getDeviceName + // mechanics is not what we're testing + sb.sysToDevMap[guestSysPath] = "" + + ctx := context.Background() + + // Replace getDevicePCIAddress with a dummy, since that's not + // what we're testing here, and it would attempt to poke in + // sysfs + savedFunc := pciPathToSysfs + defer func() { + pciPathToSysfs = savedFunc + }() + pciPathToSysfs = func(pciPath PciPath) (string, error) { + return pciSysPath, nil + } + + err := vfioDeviceHandler(ctx, device, spec, sb, devIdx) + assert.Nil(err) +} + func TestGetPCIDeviceName(t *testing.T) { assert := assert.New(t) @@ -865,13 +867,6 @@ func TestGetPCIDeviceName(t *testing.T) { _, err = getPCIDeviceNameImpl(&sb, PciPath{""}) assert.Error(err) - - rescanDir := filepath.Dir(pciBusRescanFile) - err = os.MkdirAll(rescanDir, testDirMode) - assert.NoError(err) - - _, err = getPCIDeviceNameImpl(&sb, PciPath{""}) - assert.Error(err) } func TestGetSCSIDevPath(t *testing.T) { @@ -912,26 +907,23 @@ func TestCheckCCWBusFormat(t *testing.T) { } } -func TestGetDeviceName(t *testing.T) { +func oneGetDeviceNameTest(t *testing.T, sysName, devName, watchFor string) { assert := assert.New(t) - devName := "vda" - busID := "0.0.0005" - devPath := path.Join("/devices/css0/0.0.0004", busID, "virtio4/block", devName) systodevmap := make(map[string]string) - systodevmap[devPath] = devName + systodevmap[sysName] = devName sb := sandbox{ deviceWatchers: make(map[string](chan string)), sysToDevMap: systodevmap, } - name, err := getDeviceName(&sb, busID) + name, err := getDeviceName(&sb, watchFor) assert.Nil(err) assert.Equal(name, path.Join(devRootPath, devName)) - delete(sb.sysToDevMap, devPath) + delete(sb.sysToDevMap, sysName) go func() { for { @@ -941,7 +933,7 @@ func TestGetDeviceName(t *testing.T) { continue } - if strings.Contains(devPath, devAddress) && strings.HasSuffix(devAddress, blkCCWSuffix) { + if strings.Contains(watchFor, devAddress) { ch <- devName close(ch) delete(sb.deviceWatchers, devAddress) @@ -954,12 +946,27 @@ func TestGetDeviceName(t *testing.T) { sb.Unlock() }() - name, err = getDeviceName(&sb, path.Join(busID, blkCCWSuffix)) + name, err = getDeviceName(&sb, watchFor) assert.Nil(err) assert.Equal(name, path.Join(devRootPath, devName)) } +func TestGetDeviceName(t *testing.T) { + devName := "vda" + busID := "0.0.0005" + sysName := path.Join("/devices/css0/0.0.0004", busID, "virtio4/block", devName) + + oneGetDeviceNameTest(t, sysName, devName, busID) +} + +func TestGetDeviceNameEmptyDev(t *testing.T) { + busID := "0000:01:00.0" + sysName := path.Join("/devices/pci0000:00/0000:00:1c.0", busID) + + oneGetDeviceNameTest(t, sysName, "", busID) +} + func TestUpdateDeviceCgroupForGuestRootfs(t *testing.T) { skipUnlessRoot(t) assert := assert.New(t) diff --git a/grpc.go b/grpc.go index a9ae43f737..da6dc8409b 100644 --- a/grpc.go +++ b/grpc.go @@ -618,12 +618,6 @@ func (a *agentGRPC) CreateContainer(ctx context.Context, req *pb.CreateContainer return emptyResp, err } - // re-scan PCI bus - // looking for hidden devices - if err = rescanPciBus(); err != nil { - agentLog.WithError(err).Warn("Could not rescan PCI bus") - } - // Some devices need some extra processing (the ones invoked with // --device for instance), and that's what this call is doing. It // updates the devices listed in the OCI spec, so that they actually diff --git a/mount_test.go b/mount_test.go index b0f6825341..d7f4b08a9c 100644 --- a/mount_test.go +++ b/mount_test.go @@ -617,7 +617,8 @@ func TestStorageHandlers(t *testing.T) { } sb := sandbox{ - storages: make(map[string]*sandboxStorage), + storages: make(map[string]*sandboxStorage), + deviceWatchers: make(map[string](chan string)), } ctx, cancel := context.WithCancel(context.Background())