diff --git a/internal/devices/drivers.go b/internal/devices/drivers.go index ecf2c0eaae..693959a5c3 100644 --- a/internal/devices/drivers.go +++ b/internal/devices/drivers.go @@ -13,6 +13,7 @@ import ( "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" ) // InstallDriver mounts a share from the host into the UVM, installs any kernel drivers in the share, @@ -54,19 +55,18 @@ func InstallDrivers(ctx context.Context, vm *uvm.UtilityVM, share string, gpuDri } // first mount driver as scsi in standard mount location - uvmPathForShare := fmt.Sprintf(guestpath.LCOWGlobalMountPrefixFmt, vm.UVMMountCounter()) - mount, err := vm.AddSCSI(ctx, + mount, err := vm.SCSIManager.AddVirtualDisk( + ctx, share, - uvmPathForShare, true, - false, - []string{}, - uvm.VMAccessTypeIndividual) + scsi.IndividualVMAccess(vm.ID()), + &scsi.MountConfig{}, + ) if err != nil { return closer, fmt.Errorf("failed to add SCSI disk to utility VM for path %+v: %s", share, err) } closer = mount - uvmPathForShare = mount.UVMPath + uvmPathForShare := mount.GuestPath() // construct path that the drivers will be remounted as read/write in the UVM diff --git a/internal/hcsoci/resources_lcow.go b/internal/hcsoci/resources_lcow.go index 58bf9d1115..6561bcb6b7 100644 --- a/internal/hcsoci/resources_lcow.go +++ b/internal/hcsoci/resources_lcow.go @@ -20,7 +20,7 @@ import ( "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/resources" - "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" ) func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r *resources.Resources, isSandbox bool) error { @@ -85,13 +85,18 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * l := log.G(ctx).WithField("mount", fmt.Sprintf("%+v", mount)) if mount.Type == "physical-disk" { l.Debug("hcsshim::allocateLinuxResources Hot-adding SCSI physical disk for OCI mount") - uvmPathForShare = fmt.Sprintf(guestpath.LCOWGlobalMountPrefixFmt, coi.HostingSystem.UVMMountCounter()) - scsiMount, err := coi.HostingSystem.AddSCSIPhysicalDisk(ctx, hostPath, uvmPathForShare, readOnly, mount.Options) + scsiMount, err := coi.HostingSystem.SCSIManager.AddPhysicalDisk( + ctx, + hostPath, + readOnly, + scsi.IndividualVMAccess(coi.HostingSystem.ID()), + &scsi.MountConfig{Options: mount.Options}, + ) if err != nil { return errors.Wrapf(err, "adding SCSI physical disk mount %+v", mount) } - uvmPathForFile = scsiMount.UVMPath + uvmPathForFile = scsiMount.GuestPath() r.Add(scsiMount) coi.Spec.Mounts[i].Type = "none" } else if mount.Type == "virtual-disk" { @@ -100,20 +105,18 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * // if the scsi device is already attached then we take the uvm path that the function below returns // that is where it was previously mounted in UVM - scsiMount, err := coi.HostingSystem.AddSCSI( + scsiMount, err := coi.HostingSystem.SCSIManager.AddVirtualDisk( ctx, hostPath, - uvmPathForShare, readOnly, - false, - mount.Options, - uvm.VMAccessTypeIndividual, + scsi.IndividualVMAccess(coi.HostingSystem.ID()), + &scsi.MountConfig{Options: mount.Options}, ) if err != nil { return errors.Wrapf(err, "adding SCSI virtual disk mount %+v", mount) } - uvmPathForFile = scsiMount.UVMPath + uvmPathForFile = scsiMount.GuestPath() r.Add(scsiMount) coi.Spec.Mounts[i].Type = "none" } else if strings.HasPrefix(mount.Source, guestpath.SandboxMountPrefix) { diff --git a/internal/hcsoci/resources_wcow.go b/internal/hcsoci/resources_wcow.go index 8a810a12da..0e19aa69c1 100644 --- a/internal/hcsoci/resources_wcow.go +++ b/internal/hcsoci/resources_wcow.go @@ -25,6 +25,7 @@ import ( "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/internal/wclayer" ) @@ -59,8 +60,7 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r if coi.Spec.Root.Path == "" && (coi.HostingSystem != nil || coi.Spec.Windows.HyperV == nil) { log.G(ctx).Debug("hcsshim::allocateWindowsResources mounting storage") - containerRootInUVM := r.ContainerRootInUVM() - containerRootPath, closer, err := layers.MountWCOWLayers(ctx, coi.actualID, coi.Spec.Windows.LayerFolders, containerRootInUVM, "", coi.HostingSystem) + containerRootPath, closer, err := layers.MountWCOWLayers(ctx, coi.actualID, coi.Spec.Windows.LayerFolders, "", coi.HostingSystem) if err != nil { return errors.Wrap(err, "failed to mount container storage") } @@ -146,7 +146,6 @@ func setupMounts(ctx context.Context, coi *createOptionsInternal, r *resources.R } if coi.HostingSystem != nil && schemaversion.IsV21(coi.actualSchemaVersion) { - uvmPath := fmt.Sprintf(guestpath.WCOWGlobalMountPrefixFmt, coi.HostingSystem.UVMMountCounter()) readOnly := false for _, o := range mount.Options { if strings.ToLower(o) == "ro" { @@ -157,37 +156,35 @@ func setupMounts(ctx context.Context, coi *createOptionsInternal, r *resources.R l := log.G(ctx).WithField("mount", fmt.Sprintf("%+v", mount)) if mount.Type == "physical-disk" || mount.Type == "virtual-disk" || mount.Type == "extensible-virtual-disk" { var ( - scsiMount *uvm.SCSIMount + scsiMount *scsi.Mount err error ) switch mount.Type { case "physical-disk": l.Debug("hcsshim::allocateWindowsResources Hot-adding SCSI physical disk for OCI mount") - scsiMount, err = coi.HostingSystem.AddSCSIPhysicalDisk( + scsiMount, err = coi.HostingSystem.SCSIManager.AddPhysicalDisk( ctx, mount.Source, - uvmPath, readOnly, - mount.Options, + scsi.IndividualVMAccess(coi.HostingSystem.ID()), + &scsi.MountConfig{Options: mount.Options}, ) case "virtual-disk": l.Debug("hcsshim::allocateWindowsResources Hot-adding SCSI virtual disk for OCI mount") - scsiMount, err = coi.HostingSystem.AddSCSI( + scsiMount, err = coi.HostingSystem.SCSIManager.AddVirtualDisk( ctx, mount.Source, - uvmPath, readOnly, - false, - mount.Options, - uvm.VMAccessTypeIndividual, + scsi.IndividualVMAccess(coi.HostingSystem.ID()), + &scsi.MountConfig{Options: mount.Options}, ) case "extensible-virtual-disk": l.Debug("hcsshim::allocateWindowsResource Hot-adding ExtensibleVirtualDisk") - scsiMount, err = coi.HostingSystem.AddSCSIExtensibleVirtualDisk( + scsiMount, err = coi.HostingSystem.SCSIManager.AddExtensibleVirtualDisk( ctx, mount.Source, - uvmPath, readOnly, + &scsi.MountConfig{}, ) } if err != nil { @@ -197,7 +194,7 @@ func setupMounts(ctx context.Context, coi *createOptionsInternal, r *resources.R // Compute guest mounts now, and store them, so they can be added to the container doc later. // We do this now because we have access to the guest path through the returned mount object. coi.windowsAdditionalMounts = append(coi.windowsAdditionalMounts, hcsschema.MappedDirectory{ - HostPath: scsiMount.UVMPath, + HostPath: scsiMount.GuestPath(), ContainerPath: mount.Destination, ReadOnly: readOnly, }) diff --git a/internal/jobcontainers/storage.go b/internal/jobcontainers/storage.go index fedcee814e..180c27a862 100644 --- a/internal/jobcontainers/storage.go +++ b/internal/jobcontainers/storage.go @@ -54,7 +54,7 @@ func (c *JobContainer) mountLayers(ctx context.Context, containerID string, s *s if s.Root.Path == "" { log.G(ctx).Debug("mounting job container storage") var rootPath string - rootPath, closer, err = layers.MountWCOWLayers(ctx, containerID, s.Windows.LayerFolders, "", volumeMountPath, nil) + rootPath, closer, err = layers.MountWCOWLayers(ctx, containerID, s.Windows.LayerFolders, volumeMountPath, nil) if err != nil { return nil, fmt.Errorf("failed to mount job container storage: %w", err) } diff --git a/internal/layers/layers.go b/internal/layers/layers.go index fd7cfa5389..52b19ab38f 100644 --- a/internal/layers/layers.go +++ b/internal/layers/layers.go @@ -22,6 +22,7 @@ import ( "github.com/Microsoft/hcsshim/internal/ospath" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/internal/wclayer" ) @@ -107,15 +108,12 @@ func MountLCOWLayers(ctx context.Context, containerID string, layerFolders []str } log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD") - var options []string - scsiMount, err := vm.AddSCSI( + scsiMount, err := vm.SCSIManager.AddVirtualDisk( ctx, hostPath, - containerScratchPathInUVM, false, - vm.ScratchEncryptionEnabled(), - options, - uvm.VMAccessTypeIndividual, + scsi.IndividualVMAccess(vm.ID()), + &scsi.MountConfig{Encrypted: vm.ScratchEncryptionEnabled()}, ) if err != nil { return "", "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %s", err) @@ -124,7 +122,7 @@ func MountLCOWLayers(ctx context.Context, containerID string, layerFolders []str // handles the case where we want to share a scratch disk for multiple containers instead // of mounting a new one. Pass a unique value for `ScratchPath` to avoid container upper and // work directories colliding in the UVM. - containerScratchPathInUVM = ospath.Join("linux", scsiMount.UVMPath, "scratch", containerID) + containerScratchPathInUVM = ospath.Join("linux", scsiMount.GuestPath(), "scratch", containerID) defer func() { if err != nil { @@ -160,7 +158,7 @@ func MountLCOWLayers(ctx context.Context, containerID string, layerFolders []str // // Job container: Returns the mount path on the host as a volume guid, with the volume mounted on // the host at `volumeMountPath`. -func MountWCOWLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) { +func MountWCOWLayers(ctx context.Context, containerID string, layerFolders []string, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) { if vm.OS() != "windows" { return "", nil, errors.New("MountWCOWLayers should only be called for WCOW") } @@ -168,7 +166,7 @@ func MountWCOWLayers(ctx context.Context, containerID string, layerFolders []str if vm == nil { return mountWCOWHostLayers(ctx, layerFolders, volumeMountPath) } - return mountWCOWIsolatedLayers(ctx, containerID, layerFolders, guestRoot, volumeMountPath, vm) + return mountWCOWIsolatedLayers(ctx, containerID, layerFolders, volumeMountPath, vm) } type wcowHostLayersCloser struct { @@ -305,7 +303,7 @@ func (lc *wcowIsolatedLayersCloser) Release(ctx context.Context) (retErr error) return } -func mountWCOWIsolatedLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) { +func mountWCOWIsolatedLayers(ctx context.Context, containerID string, layerFolders []string, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) { log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountWCOWLayers V2 UVM") var ( @@ -334,27 +332,17 @@ func mountWCOWIsolatedLayers(ctx context.Context, containerID string, layerFolde layerClosers = append(layerClosers, mount) } - containerScratchPathInUVM := ospath.Join(vm.OS(), guestRoot) hostPath, err := getScratchVHDPath(layerFolders) if err != nil { return "", nil, fmt.Errorf("failed to get scratch VHD path in layer folders: %s", err) } log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD") - var options []string - scsiMount, err := vm.AddSCSI( - ctx, - hostPath, - containerScratchPathInUVM, - false, - vm.ScratchEncryptionEnabled(), - options, - uvm.VMAccessTypeIndividual, - ) + scsiMount, err := vm.SCSIManager.AddVirtualDisk(ctx, hostPath, false, scsi.IndividualVMAccess(vm.ID()), &scsi.MountConfig{}) if err != nil { return "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %s", err) } - containerScratchPathInUVM = scsiMount.UVMPath + containerScratchPathInUVM := scsiMount.GuestPath() defer func() { if err != nil { @@ -402,9 +390,7 @@ func addLCOWLayer(ctx context.Context, vm *uvm.UtilityVM, layerPath string) (uvm } } - options := []string{"ro"} - uvmPath = fmt.Sprintf(guestpath.LCOWGlobalMountPrefixFmt, vm.UVMMountCounter()) - sm, err := vm.AddSCSI(ctx, layerPath, uvmPath, true, false, options, uvm.VMAccessTypeNoop) + sm, err := vm.SCSIManager.AddVirtualDisk(ctx, layerPath, true, nil, &scsi.MountConfig{Options: []string{"ro"}}) if err != nil { return "", nil, fmt.Errorf("failed to add SCSI layer: %s", err) } @@ -412,7 +398,7 @@ func addLCOWLayer(ctx context.Context, vm *uvm.UtilityVM, layerPath string) (uvm "layerPath": layerPath, "layerType": "scsi", }).Debug("Added LCOW layer") - return sm.UVMPath, sm, nil + return sm.GuestPath(), sm, nil } // GetHCSLayers converts host paths corresponding to container layers into HCS schema V2 layers diff --git a/internal/lcow/disk.go b/internal/lcow/disk.go index 937b8deafa..74e96e3af8 100644 --- a/internal/lcow/disk.go +++ b/internal/lcow/disk.go @@ -9,6 +9,7 @@ import ( "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/sirupsen/logrus" ) @@ -30,8 +31,8 @@ func FormatDisk(ctx context.Context, lcowUVM *uvm.UtilityVM, destPath string) er "dest": destPath, }).Debug("lcow::FormatDisk opts") - var options []string - scsi, err := lcowUVM.AddSCSIPhysicalDisk(ctx, destPath, "", false, options) // No destination as not formatted + // Attach without mounting. + scsi, err := lcowUVM.SCSIManager.AddPhysicalDisk(ctx, destPath, false, scsi.IndividualVMAccess(lcowUVM.ID()), nil) if err != nil { return err } @@ -46,7 +47,7 @@ func FormatDisk(ctx context.Context, lcowUVM *uvm.UtilityVM, destPath string) er "lun": scsi.LUN, }).Debug("lcow::FormatDisk device attached") - if err := formatDiskUvm(ctx, lcowUVM, scsi.Controller, scsi.LUN, destPath); err != nil { + if err := formatDiskUvm(ctx, lcowUVM, int(scsi.Controller()), int32(scsi.LUN()), destPath); err != nil { return err } log.G(ctx).WithField("dest", destPath).Debug("lcow::FormatDisk complete") diff --git a/internal/lcow/scratch.go b/internal/lcow/scratch.go index c86d141adf..2e3f82cd92 100644 --- a/internal/lcow/scratch.go +++ b/internal/lcow/scratch.go @@ -16,6 +16,7 @@ import ( "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/sirupsen/logrus" ) @@ -68,15 +69,12 @@ func CreateScratch(ctx context.Context, lcowUVM *uvm.UtilityVM, destFile string, return fmt.Errorf("failed to create VHDx %s: %s", destFile, err) } - var options []string - scsi, err := lcowUVM.AddSCSI( + scsi, err := lcowUVM.SCSIManager.AddVirtualDisk( ctx, destFile, - "", // No destination as not formatted false, - lcowUVM.ScratchEncryptionEnabled(), - options, - uvm.VMAccessTypeIndividual, + scsi.IndividualVMAccess(lcowUVM.ID()), + nil, // Attach without mounting. ) if err != nil { return err @@ -84,18 +82,18 @@ func CreateScratch(ctx context.Context, lcowUVM *uvm.UtilityVM, destFile string, removeSCSI := true defer func() { if removeSCSI { - _ = lcowUVM.RemoveSCSI(ctx, destFile) + scsi.Release(ctx) } }() log.G(ctx).WithFields(logrus.Fields{ "dest": destFile, - "controller": scsi.Controller, - "lun": scsi.LUN, + "controller": scsi.Controller(), + "lun": scsi.LUN(), }).Debug("lcow::CreateScratch device attached") // Validate /sys/bus/scsi/devices/C:0:0:L exists as a directory - devicePath := fmt.Sprintf("/sys/bus/scsi/devices/%d:0:0:%d/block", scsi.Controller, scsi.LUN) + devicePath := fmt.Sprintf("/sys/bus/scsi/devices/%d:0:0:%d/block", scsi.Controller(), scsi.LUN()) testdCtx, cancel := context.WithTimeout(ctx, timeout.TestDRetryLoop) defer cancel() for { @@ -138,7 +136,7 @@ func CreateScratch(ctx context.Context, lcowUVM *uvm.UtilityVM, destFile string, // Hot-Remove before we copy it removeSCSI = false - if err := lcowUVM.RemoveSCSI(ctx, destFile); err != nil { + if err := scsi.Release(ctx); err != nil { return fmt.Errorf("failed to hot-remove: %s", err) } diff --git a/internal/tools/uvmboot/mounts.go b/internal/tools/uvmboot/mounts.go index ade8cfc18a..b057599887 100644 --- a/internal/tools/uvmboot/mounts.go +++ b/internal/tools/uvmboot/mounts.go @@ -8,28 +8,31 @@ import ( "strings" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/sirupsen/logrus" "github.com/urfave/cli" ) func mountSCSI(ctx context.Context, c *cli.Context, vm *uvm.UtilityVM) error { for _, m := range parseMounts(c, scsiMountsArgName) { - if _, err := vm.AddSCSI( + if m.guest != "" { + return fmt.Errorf("scsi mount %s: guest path must be empty", m.host) + } + scsi, err := vm.SCSIManager.AddVirtualDisk( ctx, m.host, - m.guest, !m.writable, - false, // encrypted - []string{}, - uvm.VMAccessTypeIndividual, - ); err != nil { + scsi.IndividualVMAccess(vm.ID()), + &scsi.MountConfig{}, + ) + if err != nil { return fmt.Errorf("could not mount disk %s: %w", m.host, err) } else { logrus.WithFields(logrus.Fields{ "host": m.host, - "guest": m.guest, + "guest": scsi.GuestPath(), "writable": m.writable, - }).Debug("Mounted SCSI disk") + }).Info("Mounted SCSI disk") } } diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index ebc0aff79e..1af7761018 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -15,6 +15,7 @@ import ( "github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio/pkg/guid" "github.com/Microsoft/hcsshim/internal/security" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/pkg/securitypolicy" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -643,7 +644,7 @@ func makeLCOWDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ *hcs Path: rootfsFullPath, ReadOnly: true, } - uvm.scsiLocations[0][0] = newSCSIMount(uvm, rootfsFullPath, "/", "VirtualDisk", "", 1, 0, 0, true, false) + uvm.reservedSCSISlots = append(uvm.reservedSCSISlots, scsi.Slot{Controller: 0, LUN: 0}) } } diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index a8ae3d651f..5b74a7e4d4 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -22,6 +22,7 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/security" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/internal/uvmfolder" "github.com/Microsoft/hcsshim/internal/wclayer" "github.com/Microsoft/hcsshim/internal/wcow" @@ -324,16 +325,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error Type_: "VirtualDisk", } - uvm.scsiLocations[0][0] = newSCSIMount(uvm, - doc.VirtualMachine.Devices.Scsi[guestrequest.ScsiControllerGuids[0]].Attachments["0"].Path, - "", - doc.VirtualMachine.Devices.Scsi[guestrequest.ScsiControllerGuids[0]].Attachments["0"].Type_, - "", - 1, - 0, - 0, - false, - false) + uvm.reservedSCSISlots = append(uvm.reservedSCSISlots, scsi.Slot{Controller: 0, LUN: 0}) err = uvm.create(ctx, doc) if err != nil { diff --git a/internal/uvm/scsi.go b/internal/uvm/scsi.go deleted file mode 100644 index df6ebfc22d..0000000000 --- a/internal/uvm/scsi.go +++ /dev/null @@ -1,551 +0,0 @@ -//go:build windows - -package uvm - -import ( - "context" - "fmt" - "strings" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - - "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" - "github.com/Microsoft/hcsshim/internal/log" - "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" - "github.com/Microsoft/hcsshim/internal/protocol/guestresource" - "github.com/Microsoft/hcsshim/internal/security" - "github.com/Microsoft/hcsshim/internal/wclayer" -) - -// VMAccessType is used to determine the various types of access we can -// grant for a given file. -type VMAccessType int - -const ( - // `VMAccessTypeNoop` indicates no additional access should be given. Note - // this should be used for layers and gpu vhd where we have given VM group - // access outside of the shim (containerd for layers, package installation - // for gpu vhd). - VMAccessTypeNoop VMAccessType = iota - // `VMAccessTypeGroup` indicates we should give access to a file for the VM group sid - VMAccessTypeGroup - // `VMAccessTypeIndividual` indicates we should give additional access to a file for - // the running VM only - VMAccessTypeIndividual -) - -var ( - ErrNoAvailableLocation = fmt.Errorf("no available location") - ErrNotAttached = fmt.Errorf("not attached") - ErrAlreadyAttached = fmt.Errorf("already attached") - ErrNoSCSIControllers = fmt.Errorf("no SCSI controllers configured for this utility VM") - ErrTooManyAttachments = fmt.Errorf("too many SCSI attachments") - ErrSCSILayerWCOWUnsupported = fmt.Errorf("SCSI attached layers are not supported for WCOW") -) - -// Release frees the resources of the corresponding Scsi Mount -func (sm *SCSIMount) Release(ctx context.Context) error { - if err := sm.vm.RemoveSCSI(ctx, sm.HostPath); err != nil { - return fmt.Errorf("failed to remove SCSI device: %s", err) - } - return nil -} - -// SCSIMount struct representing a SCSI mount point and the UVM -// it belongs to. -type SCSIMount struct { - // Utility VM the scsi mount belongs to - vm *UtilityVM - // path is the host path to the vhd that is mounted. - HostPath string - // path for the uvm - UVMPath string - // scsi controller - Controller int - // scsi logical unit number - LUN int32 - // While most VHDs attached to SCSI are scratch spaces, in the case of LCOW - // when the size is over the size possible to attach to PMEM, we use SCSI for - // read-only layers. As RO layers are shared, we perform ref-counting. - isLayer bool - refCount uint32 - // specifies if this is an encrypted VHD - encrypted bool - // specifies if this is a readonly layer - readOnly bool - // "VirtualDisk" or "PassThru" or "ExtensibleVirtualDisk" disk attachment type. - attachmentType string - // If attachmentType is "ExtensibleVirtualDisk" then extensibleVirtualDiskType should - // specify the type of it (for e.g "space" for storage spaces). Otherwise this should be - // empty. - extensibleVirtualDiskType string - - // A channel to wait on while mount of this SCSI disk is in progress. - waitCh chan struct{} - // The error field that is set if the mounting of this disk fails. Any other waiters on waitCh - // can use this waitErr after the channel is closed. - waitErr error -} - -// addSCSIRequest is an internal struct used to hold all the parameters that are sent to -// the addSCSIActual method. -type addSCSIRequest struct { - // host path to the disk that should be added as a SCSI disk. - hostPath string - // the path inside the uvm at which this disk should show up. Can be empty. - uvmPath string - // attachmentType is required and `must` be `VirtualDisk` for vhd/vhdx - // attachments, `PassThru` for physical disk and `ExtensibleVirtualDisk` for - // Extensible virtual disks. - attachmentType string - // indicates if the VHD is encrypted - encrypted bool - // indicates if the attachment should be added read only. - readOnly bool - // guestOptions is a slice that contains optional information to pass to the guest - // service. - guestOptions []string - // indicates what access to grant the vm for the hostpath. Only required for - // `VirtualDisk` and `PassThru` disk types. - vmAccess VMAccessType - // `evdType` indicates the type of the extensible virtual disk if `attachmentType` - // is "ExtensibleVirtualDisk" should be empty otherwise. - evdType string -} - -// RefCount returns the current refcount for the SCSI mount. -func (sm *SCSIMount) RefCount() uint32 { - return sm.refCount -} - -func (sm *SCSIMount) logFormat() logrus.Fields { - return logrus.Fields{ - "HostPath": sm.HostPath, - "UVMPath": sm.UVMPath, - "isLayer": sm.isLayer, - "refCount": sm.refCount, - "Controller": sm.Controller, - "LUN": sm.LUN, - "ExtensibleVirtualDiskType": sm.extensibleVirtualDiskType, - } -} - -func newSCSIMount( - uvm *UtilityVM, - hostPath string, - uvmPath string, - attachmentType string, - evdType string, - refCount uint32, - controller int, - lun int32, - readOnly bool, - encrypted bool, -) *SCSIMount { - return &SCSIMount{ - vm: uvm, - HostPath: hostPath, - UVMPath: uvmPath, - refCount: refCount, - Controller: controller, - LUN: int32(lun), - encrypted: encrypted, - readOnly: readOnly, - attachmentType: attachmentType, - extensibleVirtualDiskType: evdType, - waitCh: make(chan struct{}), - } -} - -// allocateSCSISlot finds the next available slot on the -// SCSI controllers associated with a utility VM to use. -// Lock must be held when calling this function -func (uvm *UtilityVM) allocateSCSISlot(ctx context.Context) (int, int, error) { - for controller := 0; controller < int(uvm.scsiControllerCount); controller++ { - for lun, sm := range uvm.scsiLocations[controller] { - // If sm is nil, we have found an open slot so we allocate a new SCSIMount - if sm == nil { - return controller, lun, nil - } - } - } - return -1, -1, ErrNoAvailableLocation -} - -func (uvm *UtilityVM) deallocateSCSIMount(ctx context.Context, sm *SCSIMount) { - uvm.m.Lock() - defer uvm.m.Unlock() - if sm != nil { - log.G(ctx).WithFields(sm.logFormat()).Debug("removed SCSI location") - uvm.scsiLocations[sm.Controller][sm.LUN] = nil - } -} - -// Lock must be held when calling this function. -func (uvm *UtilityVM) findSCSIAttachment(ctx context.Context, findThisHostPath string) (*SCSIMount, error) { - for _, luns := range uvm.scsiLocations { - for _, sm := range luns { - if sm != nil && sm.HostPath == findThisHostPath { - log.G(ctx).WithFields(sm.logFormat()).Debug("found SCSI location") - return sm, nil - } - } - } - return nil, ErrNotAttached -} - -// RemoveSCSI removes a SCSI disk from a utility VM. -func (uvm *UtilityVM) RemoveSCSI(ctx context.Context, hostPath string) error { - uvm.m.Lock() - defer uvm.m.Unlock() - - if uvm.scsiControllerCount == 0 { - return ErrNoSCSIControllers - } - - // Make sure it is actually attached - sm, err := uvm.findSCSIAttachment(ctx, hostPath) - if err != nil { - return err - } - - sm.refCount-- - if sm.refCount > 0 { - return nil - } - - scsiModification := &hcsschema.ModifySettingRequest{ - RequestType: guestrequest.RequestTypeRemove, - ResourcePath: fmt.Sprintf(resourcepaths.SCSIResourceFormat, guestrequest.ScsiControllerGuids[sm.Controller], sm.LUN), - } - - var verity *guestresource.DeviceVerityInfo - if v, iErr := readVeritySuperBlock(ctx, hostPath); iErr != nil { - log.G(ctx).WithError(iErr).WithField("hostPath", sm.HostPath).Debug("unable to read dm-verity information from VHD") - } else { - if v != nil { - log.G(ctx).WithFields(logrus.Fields{ - "hostPath": hostPath, - "rootDigest": v.RootDigest, - }).Debug("removing SCSI with dm-verity") - } - verity = v - } - - // Include the GuestRequest so that the GCS ejects the disk cleanly if the - // disk was attached/mounted - // - // Note: We always send a guest eject even if there is no UVM path in lcow - // so that we synchronize the guest state. This seems to always avoid SCSI - // related errors if this index quickly reused by another container. - if uvm.operatingSystem == "windows" && sm.UVMPath != "" { - scsiModification.GuestRequest = guestrequest.ModificationRequest{ - ResourceType: guestresource.ResourceTypeMappedVirtualDisk, - RequestType: guestrequest.RequestTypeRemove, - Settings: guestresource.WCOWMappedVirtualDisk{ - ContainerPath: sm.UVMPath, - Lun: sm.LUN, - }, - } - } else { - scsiModification.GuestRequest = guestrequest.ModificationRequest{ - ResourceType: guestresource.ResourceTypeMappedVirtualDisk, - RequestType: guestrequest.RequestTypeRemove, - Settings: guestresource.LCOWMappedVirtualDisk{ - MountPath: sm.UVMPath, // May be blank in attach-only - Lun: uint8(sm.LUN), - Controller: uint8(sm.Controller), - VerityInfo: verity, - }, - } - } - - if err := uvm.modify(ctx, scsiModification); err != nil { - return fmt.Errorf("failed to remove SCSI disk %s from container %s: %s", hostPath, uvm.id, err) - } - log.G(ctx).WithFields(sm.logFormat()).Debug("removed SCSI location") - uvm.scsiLocations[sm.Controller][sm.LUN] = nil - return nil -} - -// AddSCSI adds a SCSI disk to a utility VM at the next available location. This -// function should be called for adding a scratch layer, a read-only layer as an -// alternative to VPMEM, or for other VHD mounts. -// -// `hostPath` is required and must point to a vhd/vhdx path. -// -// `uvmPath` is optional. If not provided, no guest request will be made -// -// `readOnly` set to `true` if the vhd/vhdx should be attached read only. -// -// `encrypted` set to `true` if the vhd/vhdx should be attached in encrypted mode. -// The device will be formatted, so this option must be used only when creating -// scratch vhd/vhdx. -// -// `guestOptions` is a slice that contains optional information to pass -// to the guest service -// -// `vmAccess` indicates what access to grant the vm for the hostpath -func (uvm *UtilityVM) AddSCSI( - ctx context.Context, - hostPath string, - uvmPath string, - readOnly bool, - encrypted bool, - guestOptions []string, - vmAccess VMAccessType, -) (*SCSIMount, error) { - addReq := &addSCSIRequest{ - hostPath: hostPath, - uvmPath: uvmPath, - attachmentType: "VirtualDisk", - readOnly: readOnly, - encrypted: encrypted, - guestOptions: guestOptions, - vmAccess: vmAccess, - } - return uvm.addSCSIActual(ctx, addReq) -} - -// AddSCSIPhysicalDisk attaches a physical disk from the host directly to the -// Utility VM at the next available location. -// -// `hostPath` is required and `likely` start's with `\\.\PHYSICALDRIVE`. -// -// `uvmPath` is optional if a guest mount is not requested. -// -// `readOnly` set to `true` if the physical disk should be attached read only. -// -// `guestOptions` is a slice that contains optional information to pass -// to the guest service -func (uvm *UtilityVM) AddSCSIPhysicalDisk(ctx context.Context, hostPath, uvmPath string, readOnly bool, guestOptions []string) (*SCSIMount, error) { - addReq := &addSCSIRequest{ - hostPath: hostPath, - uvmPath: uvmPath, - attachmentType: "PassThru", - readOnly: readOnly, - guestOptions: guestOptions, - vmAccess: VMAccessTypeIndividual, - } - return uvm.addSCSIActual(ctx, addReq) -} - -// AddSCSIExtensibleVirtualDisk adds an extensible virtual disk as a SCSI mount -// to the utility VM at the next available location. All such disks which are not actual virtual disks -// but provide the same SCSI interface are added to the UVM as Extensible Virtual disks. -// -// `hostPath` is required. Depending on the type of the extensible virtual disk the format of `hostPath` can -// be different. -// For example, in case of storage spaces the host path must be in the -// `evd://space/{storage_pool_unique_ID}{virtual_disk_unique_ID}` format. -// -// `uvmPath` must be provided in order to be able to use this disk in a container. -// -// `readOnly` set to `true` if the virtual disk should be attached read only. -// -// `vmAccess` indicates what access to grant the vm for the hostpath -func (uvm *UtilityVM) AddSCSIExtensibleVirtualDisk(ctx context.Context, hostPath, uvmPath string, readOnly bool) (*SCSIMount, error) { - if uvmPath == "" { - return nil, errors.New("uvmPath can not be empty for extensible virtual disk") - } - evdType, mountPath, err := ParseExtensibleVirtualDiskPath(hostPath) - if err != nil { - return nil, err - } - addReq := &addSCSIRequest{ - hostPath: mountPath, - uvmPath: uvmPath, - attachmentType: "ExtensibleVirtualDisk", - readOnly: readOnly, - guestOptions: []string{}, - vmAccess: VMAccessTypeIndividual, - evdType: evdType, - } - return uvm.addSCSIActual(ctx, addReq) -} - -// addSCSIActual is the implementation behind the external functions AddSCSI, -// AddSCSIPhysicalDisk, AddSCSIExtensibleVirtualDisk. -// -// We are in control of everything ourselves. Hence we have ref- counting and -// so-on tracking what SCSI locations are available or used. -// -// Returns result from calling modify with the given scsi mount -func (uvm *UtilityVM) addSCSIActual(ctx context.Context, addReq *addSCSIRequest) (_ *SCSIMount, err error) { - sm, existed, err := uvm.allocateSCSIMount( - ctx, - addReq.readOnly, - addReq.encrypted, - addReq.hostPath, - addReq.uvmPath, - addReq.attachmentType, - addReq.evdType, - addReq.vmAccess, - ) - if err != nil { - return nil, err - } - - if existed { - // another mount request might be in progress, wait for it to finish and if that operation - // fails return that error. - <-sm.waitCh - if sm.waitErr != nil { - return nil, sm.waitErr - } - return sm, nil - } - - // This is the first goroutine to add this disk, close the waitCh after we are done. - defer func() { - if err != nil { - uvm.deallocateSCSIMount(ctx, sm) - } - - // error must be set _before_ the channel is closed. - sm.waitErr = err - close(sm.waitCh) - }() - - SCSIModification := &hcsschema.ModifySettingRequest{ - RequestType: guestrequest.RequestTypeAdd, - Settings: hcsschema.Attachment{ - Path: sm.HostPath, - Type_: addReq.attachmentType, - ReadOnly: addReq.readOnly, - ExtensibleVirtualDiskType: addReq.evdType, - }, - ResourcePath: fmt.Sprintf(resourcepaths.SCSIResourceFormat, guestrequest.ScsiControllerGuids[sm.Controller], sm.LUN), - } - - if sm.UVMPath != "" { - guestReq := guestrequest.ModificationRequest{ - ResourceType: guestresource.ResourceTypeMappedVirtualDisk, - RequestType: guestrequest.RequestTypeAdd, - } - - if uvm.operatingSystem == "windows" { - guestReq.Settings = guestresource.WCOWMappedVirtualDisk{ - ContainerPath: sm.UVMPath, - Lun: sm.LUN, - } - } else { - var verity *guestresource.DeviceVerityInfo - if v, iErr := readVeritySuperBlock(ctx, sm.HostPath); iErr != nil { - log.G(ctx).WithError(iErr).WithField("hostPath", sm.HostPath).Debug("unable to read dm-verity information from VHD") - } else { - if v != nil { - log.G(ctx).WithFields(logrus.Fields{ - "hostPath": sm.HostPath, - "rootDigest": v.RootDigest, - }).Debug("adding SCSI with dm-verity") - } - verity = v - } - - guestReq.Settings = guestresource.LCOWMappedVirtualDisk{ - MountPath: sm.UVMPath, - Lun: uint8(sm.LUN), - Controller: uint8(sm.Controller), - ReadOnly: addReq.readOnly, - Encrypted: addReq.encrypted, - Options: addReq.guestOptions, - VerityInfo: verity, - } - } - SCSIModification.GuestRequest = guestReq - } - - if err := uvm.modify(ctx, SCSIModification); err != nil { - return nil, fmt.Errorf("failed to modify UVM with new SCSI mount: %s", err) - } - return sm, nil -} - -// allocateSCSIMount grants vm access to hostpath and increments the ref count of an existing scsi -// device or allocates a new one if not already present. -// Returns the resulting *SCSIMount, a bool indicating if the scsi device was already present, -// and error if any. -func (uvm *UtilityVM) allocateSCSIMount( - ctx context.Context, - readOnly bool, - encrypted bool, - hostPath string, - uvmPath string, - attachmentType string, - evdType string, - vmAccess VMAccessType, -) (*SCSIMount, bool, error) { - if attachmentType != "ExtensibleVirtualDisk" { - // Ensure the utility VM has access - err := grantAccess(ctx, uvm.id, hostPath, vmAccess) - if err != nil { - return nil, false, errors.Wrapf(err, "failed to grant VM access for SCSI mount") - } - } - // We must hold the lock throughout the lookup (findSCSIAttachment) until - // after the possible allocation (allocateSCSISlot) has been completed to ensure - // there isn't a race condition for it being attached by another thread between - // these two operations. - uvm.m.Lock() - defer uvm.m.Unlock() - if sm, err := uvm.findSCSIAttachment(ctx, hostPath); err == nil { - sm.refCount++ - return sm, true, nil - } - - controller, lun, err := uvm.allocateSCSISlot(ctx) - if err != nil { - return nil, false, err - } - - uvm.scsiLocations[controller][lun] = newSCSIMount( - uvm, - hostPath, - uvmPath, - attachmentType, - evdType, - 1, - controller, - int32(lun), - readOnly, - encrypted, - ) - - log.G(ctx).WithFields(uvm.scsiLocations[controller][lun].logFormat()).Debug("allocated SCSI mount") - - return uvm.scsiLocations[controller][lun], false, nil -} - -// ScratchEncryptionEnabled is a getter for `uvm.encryptScratch`. -// -// Returns true if the scratch disks should be encrypted, false otherwise. -func (uvm *UtilityVM) ScratchEncryptionEnabled() bool { - return uvm.encryptScratch -} - -// grantAccess helper function to grant access to a file for the vm or vm group -func grantAccess(ctx context.Context, uvmID string, hostPath string, vmAccess VMAccessType) error { - switch vmAccess { - case VMAccessTypeGroup: - log.G(ctx).WithField("path", hostPath).Debug("granting vm group access") - return security.GrantVmGroupAccess(hostPath) - case VMAccessTypeIndividual: - return wclayer.GrantVmAccess(ctx, uvmID, hostPath) - } - return nil -} - -// ParseExtensibleVirtualDiskPath parses the evd path provided in the config. -// extensible virtual disk path has format "evd:///" -// this function parses that and returns the `evdType` and `evd-mount-path`. -func ParseExtensibleVirtualDiskPath(hostPath string) (evdType, mountPath string, err error) { - trimmedPath := strings.TrimPrefix(hostPath, "evd://") - separatorIndex := strings.Index(trimmedPath, "/") - if separatorIndex <= 0 { - return "", "", errors.Errorf("invalid extensible vhd path: %s", hostPath) - } - return trimmedPath[:separatorIndex], trimmedPath[separatorIndex+1:], nil -} diff --git a/internal/uvm/scsi/README.md b/internal/uvm/scsi/README.md new file mode 100644 index 0000000000..e823eef13e --- /dev/null +++ b/internal/uvm/scsi/README.md @@ -0,0 +1,98 @@ +# Package scsi + +This README intends to act as internal developer guidance for the package. Guidance +to consumers of the package should be included as Go doc comments in the package code +itself. + +## Terminology + +We will generally use the term "attachment" to refer to a SCSI device being made +available to a VM, so that it is visible to the guest OS. +We will generally use the term "mount" to refer to a SCSI device being mounted +to a specific path, and with specific settings (e.g. encryption) inside +a guest OS. + +## Principles + +The general principle of this package is that attachments and mounts of SCSI devices +are completely separate operations, so they should be tracked and handled separately. +Out of a desire for convenience to the client of the package, we provide a `Manager` +type which handles them together, but under the hood attach/mount are managed by +separate components. + +## Architecture + +The package is composed of several layers of components: + +### Top level, the `Manager` type + +`Manager` is an exported type which acts as the primary entrypoint from external +consumers. It provides several methods which can be used to attach+mount a SCSI +device to a VM. + +`Add*` methods on `Manager` return a `Mount`, which serves two purposes: +- Provides information to the caller on the attachment/mount, such as controller, + LUN, and guest OS mount path. +- Tracks the resources associated with the SCSI attachment/mount, and provides a + `Release` method to clean them up. + +`Manager` itself is a fairly thin layer on top of two unexported types: `attachManager` +and `mountManager`. + +### Mid level, the `attachManager` and `mountManager` types + +These types are responsible for actually managing the state of attachments and mounts +on the VM. + +`attachManager`: +- Tracks what SCSI devices are currently attached, along with what controllers/LUNs are + used. +- When it is asked to attach a new SCSI device, it will first check if the attachment + already exists, and increase its refcount if so. If not, it will allocate a new + controller/LUN slot for the attachment, and then call the `Attacher` to actually carry + out the attach operation. +- Tracks refcount on any attached SCSI devices, so that they are not detached until there + has been a detach request for each matching attach request. + +`mountManager`: +- Tracks current SCSI devices mounted in the guest OS, and what mount options were applied. +- When it is asked to mount a new SCSI device, it will first check if the mount (with same options) + already exists, and increase its refcount if so. If not, it will track the new mount, and + call the `Mounter` to actually carry out the guest mount operation. +- Tracks refcount on any mounted SCSI devices, so that they are not unmounted until there has + been an unmount request for each matching mount request. + +### Low level, the `Attacher`, `Mounter` and `Unplugger` types + +These three are interfaces defined by the package. They provide a way for the client to +provide low-level implementations of SCSI operations, passed in when constructing the +`Manager` object. + +These types are what carry out the actual operations on HCS or the bridge, for instance, to do +a SCSI attachment or mount. + +The `Unplugger` type exists to help with removing a SCSI device from the guest before detaching. It +is used in the case of Linux containers to cleanly unplug the SCSI device. + +## Future work + +Some thoughts on how this package could evolve in the future. This is intended to inform the direction +of future changes as we work in the package. + +- The `mountManager` actually has very little to do with SCSI (at least for Linux containers, Windows + may be more complicated/limited). In fact, the only SCSI-specific part of mounting in the guest is + pretty much just identifying a block device based on the SCSI controller/LUN. It would be interesting + to separate out the SCSI controller/LUN -> block device mapping part from the rest of the guest mount + operation. This could enable us to e.g. use the same "mount" operation for SCSI and VPMEM, since they + both present a block device. +- We should not be silently and automatically scanning a SCSI device for verity info. Determining what + (if any) verity info to use for a device should probably be determined by the client of the package. +- Likewise, ACLing disks so a VM can access them should likely fall outside the purview of the package + as well. +- The various implementations of `Attacher`, `Mounter`, and `Unplugger` should probably live outisde + the package. There is no real reason for them to be defined in here, other than not having a clear + place to put them instead right now. +- For unmounting, it is awkward to have to re-pass the mount configuration to the guest again. There is + not a clear course of action if this differs from the original mount configuration, nor is this checked + anywhere. It would be good for the guest to instead track what cleanup is needed for each mount point, + and then we don't need to pass anything except the mount point in the unmount request. \ No newline at end of file diff --git a/internal/uvm/scsi/attach.go b/internal/uvm/scsi/attach.go new file mode 100644 index 0000000000..f7ed8558c3 --- /dev/null +++ b/internal/uvm/scsi/attach.go @@ -0,0 +1,155 @@ +package scsi + +import ( + "context" + "fmt" + "sync" +) + +type attachManager struct { + m sync.Mutex + attacher Attacher + unplugger Unplugger + numControllers int + numLUNsPerController int + slots [][]*attachment +} + +func newAttachManager(attacher Attacher, unplugger Unplugger, numControllers, numLUNsPerController int, reservedSlots []Slot) *attachManager { + slots := make([][]*attachment, numControllers) + for i := range slots { + slots[i] = make([]*attachment, numLUNsPerController) + } + for _, reservedSlot := range reservedSlots { + // Mark the slot as already filled so we don't try to re-use it. + // The nil attachConfig should mean it never matches a prospective new attach. + // The refCount of 1 should not strictly be needed, since we will never get a + // remove call for this slot, but is done for added safety. + slots[reservedSlot.Controller][reservedSlot.LUN] = &attachment{refCount: 1} + } + return &attachManager{ + attacher: attacher, + unplugger: unplugger, + numControllers: numControllers, + numLUNsPerController: numLUNsPerController, + slots: slots, + } +} + +type attachment struct { + controller uint + lun uint + config *attachConfig + waitErr error + waitCh chan struct{} + refCount uint +} + +type attachConfig struct { + path string + readOnly bool + typ string + evdType string +} + +func (am *attachManager) attach(ctx context.Context, c *attachConfig) (controller uint, lun uint, err error) { + att, existed, err := am.trackAttachment(c) + if err != nil { + return 0, 0, err + } + if existed { + <-att.waitCh + if att.waitErr != nil { + return 0, 0, att.waitErr + } + return att.controller, att.lun, nil + } + + defer func() { + if err != nil { + am.untrackAttachment(att, true) + } + + att.waitErr = err + close(att.waitCh) + }() + + if err := am.attacher.attach(ctx, att.controller, att.lun, att.config); err != nil { + return 0, 0, fmt.Errorf("attach %s/%s at controller %d lun %d: %w", att.config.typ, att.config.path, att.controller, att.lun, err) + } + return att.controller, att.lun, nil +} + +func (am *attachManager) detach(ctx context.Context, controller, lun uint) (bool, error) { + am.m.Lock() + defer am.m.Unlock() + + if controller >= uint(am.numControllers) || lun >= uint(am.numLUNsPerController) { + return false, fmt.Errorf("controller %d lun %d out of range", controller, lun) + } + + att := am.slots[controller][lun] + att.refCount-- + if att.refCount > 0 { + return false, nil + } + + if err := am.unplugger.unplug(ctx, controller, lun); err != nil { + return false, fmt.Errorf("unplug controller %d lun %d: %w", controller, lun, err) + } + if err := am.attacher.detach(ctx, controller, lun); err != nil { + return false, fmt.Errorf("detach controller %d lun %d: %w", controller, lun, err) + } + + am.untrackAttachment(att, false) + + return true, nil +} + +func (am *attachManager) trackAttachment(c *attachConfig) (*attachment, bool, error) { + am.m.Lock() + defer am.m.Unlock() + + var ( + freeController int = -1 + freeLUN int = -1 + ) + for controller := range am.slots { + for lun := range am.slots[controller] { + attachment := am.slots[controller][lun] + if attachment == nil { + if freeController == -1 { + freeController = controller + freeLUN = lun + } + } else if c == attachment.config { + attachment.refCount++ + return attachment, true, nil + } + } + } + + if freeController == -1 { + return nil, false, ErrNoAvailableLocation + } + + // New attachment. + attachment := &attachment{ + controller: uint(freeController), + lun: uint(freeLUN), + config: c, + refCount: 1, + waitCh: make(chan struct{}), + } + am.slots[freeController][freeLUN] = attachment + return attachment, false, nil +} + +func (am *attachManager) untrackAttachment(attachment *attachment, takeLock bool) { + if takeLock { + am.m.Lock() + defer am.m.Unlock() + } + + am.slots[attachment.controller][attachment.lun] = nil +} diff --git a/internal/uvm/scsi/backend.go b/internal/uvm/scsi/backend.go new file mode 100644 index 0000000000..3f387b1ee1 --- /dev/null +++ b/internal/uvm/scsi/backend.go @@ -0,0 +1,256 @@ +package scsi + +import ( + "context" + "errors" + "fmt" + + "github.com/Microsoft/hcsshim/internal/gcs" + "github.com/Microsoft/hcsshim/internal/hcs" + "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// The concrete types here (not the Attacher/Mounter/Unplugger interfaces) would be a good option +// to move out to another package eventually. There is no real reason for them to live in +// the scsi package, and it could cause cyclical dependencies in the future. + +// Attacher provides the low-level operations for attaching a SCSI device to a VM. +type Attacher interface { + attach(ctx context.Context, controller, lun uint, config *attachConfig) error + detach(ctx context.Context, controller, lun uint) error +} + +// Mounter provides the low-level operations for mounting a SCSI device inside the guest OS. +type Mounter interface { + mount(ctx context.Context, controller, lun uint, path string, config *mountConfig) error + unmount(ctx context.Context, controller, lun uint, path string, config *mountConfig) error +} + +// Unplugger provides the low-level operations for cleanly removing a SCSI device inside the guest OS. +type Unplugger interface { + unplug(ctx context.Context, controller, lun uint) error +} + +var _ Attacher = &hcsAttacher{} + +type hcsAttacher struct { + system *hcs.System +} + +// NewHCSAttacher provides an [Attacher] using a [hcs.System]. +func NewHCSAttacher(system *hcs.System) Attacher { + return &hcsAttacher{system} +} + +func (ha *hcsAttacher) attach(ctx context.Context, controller, lun uint, config *attachConfig) error { + req := &hcsschema.ModifySettingRequest{ + RequestType: guestrequest.RequestTypeAdd, + Settings: hcsschema.Attachment{ + Path: config.path, + Type_: config.typ, + ReadOnly: config.readOnly, + ExtensibleVirtualDiskType: config.evdType, + }, + ResourcePath: fmt.Sprintf(resourcepaths.SCSIResourceFormat, guestrequest.ScsiControllerGuids[controller], lun), + } + return ha.system.Modify(ctx, req) +} + +func (ha *hcsAttacher) detach(ctx context.Context, controller, lun uint) error { + req := &hcsschema.ModifySettingRequest{ + RequestType: guestrequest.RequestTypeRemove, + ResourcePath: fmt.Sprintf(resourcepaths.SCSIResourceFormat, guestrequest.ScsiControllerGuids[controller], lun), + } + return ha.system.Modify(ctx, req) +} + +var _ Mounter = &bridgeMounter{} + +type bridgeMounter struct { + gc *gcs.GuestConnection + osType string +} + +// NewBridgeMounter provides a [Mounter] using a [gcs.GuestConnection]. +// +// osType should be either "windows" or "linux". +func NewBridgeMounter(gc *gcs.GuestConnection, osType string) Mounter { + return &bridgeMounter{gc, osType} +} + +func (bm *bridgeMounter) mount(ctx context.Context, controller, lun uint, path string, config *mountConfig) error { + req, err := mountRequest(controller, lun, path, config, bm.osType) + if err != nil { + return err + } + return bm.gc.Modify(ctx, req) +} + +func (bm *bridgeMounter) unmount(ctx context.Context, controller, lun uint, path string, config *mountConfig) error { + req, err := unmountRequest(controller, lun, path, config, bm.osType) + if err != nil { + return err + } + return bm.gc.Modify(ctx, req) +} + +var _ Mounter = &hcsMounter{} + +type hcsMounter struct { + system *hcs.System + osType string +} + +// NewHCSMounter provides a [Mounter] using a [hcs.System]. +// +// osType should be either "windows" or "linux". +func NewHCSMounter(system *hcs.System, osType string) Mounter { + return &hcsMounter{system, osType} +} + +func (hm *hcsMounter) mount(ctx context.Context, controller, lun uint, path string, config *mountConfig) error { + req, err := mountRequest(controller, lun, path, config, hm.osType) + if err != nil { + return err + } + return hm.system.Modify(ctx, &hcsschema.ModifySettingRequest{GuestRequest: req}) +} + +func (hm *hcsMounter) unmount(ctx context.Context, controller, lun uint, path string, config *mountConfig) error { + req, err := unmountRequest(controller, lun, path, config, hm.osType) + if err != nil { + return err + } + return hm.system.Modify(ctx, &hcsschema.ModifySettingRequest{GuestRequest: req}) +} + +var _ Unplugger = &bridgeUnplugger{} + +type bridgeUnplugger struct { + gc *gcs.GuestConnection + osType string +} + +// NewBridgeUnplugger provides an [Unplugger] using a [gcs.GuestConnection]. +// +// osType should be either "windows" or "linux". +func NewBridgeUnplugger(gc *gcs.GuestConnection, osType string) Unplugger { + return &bridgeUnplugger{gc, osType} +} + +func (bu *bridgeUnplugger) unplug(ctx context.Context, controller, lun uint) error { + req, err := unplugRequest(controller, lun, bu.osType) + if err != nil { + return err + } + if req.RequestType == "" { + return nil + } + return bu.gc.Modify(ctx, req) +} + +var _ Unplugger = &hcsUnplugger{} + +type hcsUnplugger struct { + system *hcs.System + osType string +} + +// NewHCSUnplugger provides an [Unplugger] using a [hcs.System]. +// +// osType should be either "windows" or "linux". +func NewHCSUnplugger(system *hcs.System, osType string) Unplugger { + return &hcsUnplugger{system, osType} +} + +func (hu *hcsUnplugger) unplug(ctx context.Context, controller, lun uint) error { + req, err := unplugRequest(controller, lun, hu.osType) + if err != nil { + return err + } + if req.RequestType == "" { + return nil + } + return hu.system.Modify(ctx, &hcsschema.ModifySettingRequest{GuestRequest: req}) +} + +func mountRequest(controller, lun uint, path string, config *mountConfig, osType string) (guestrequest.ModificationRequest, error) { + req := guestrequest.ModificationRequest{ + ResourceType: guestresource.ResourceTypeMappedVirtualDisk, + RequestType: guestrequest.RequestTypeAdd, + } + switch osType { + case "windows": + // We don't check config.readOnly here, as that will still result in the overall attachment being read-only. + if controller != 0 { + return guestrequest.ModificationRequest{}, errors.New("WCOW only supports SCSI controller 0") + } + if config.encrypted || config.verity != nil || len(config.options) != 0 { + return guestrequest.ModificationRequest{}, errors.New("WCOW does not support encrypted, verity, or guest options on mounts") + } + req.Settings = guestresource.WCOWMappedVirtualDisk{ + ContainerPath: path, + Lun: int32(lun), + } + case "linux": + req.Settings = guestresource.LCOWMappedVirtualDisk{ + MountPath: path, + Controller: uint8(controller), + Lun: uint8(lun), + ReadOnly: config.readOnly, + Encrypted: config.encrypted, + Options: config.options, + VerityInfo: config.verity, + } + default: + return guestrequest.ModificationRequest{}, fmt.Errorf("unsupported os type: %s", osType) + } + return req, nil +} + +func unmountRequest(controller, lun uint, path string, config *mountConfig, osType string) (guestrequest.ModificationRequest, error) { + req := guestrequest.ModificationRequest{ + ResourceType: guestresource.ResourceTypeMappedVirtualDisk, + RequestType: guestrequest.RequestTypeRemove, + } + switch osType { + case "windows": + req.Settings = guestresource.WCOWMappedVirtualDisk{ + ContainerPath: path, + Lun: int32(lun), + } + case "linux": + req.Settings = guestresource.LCOWMappedVirtualDisk{ + MountPath: path, + Lun: uint8(lun), + Controller: uint8(controller), + VerityInfo: config.verity, + } + default: + return guestrequest.ModificationRequest{}, fmt.Errorf("unsupported os type: %s", osType) + } + return req, nil +} + +func unplugRequest(controller, lun uint, osType string) (guestrequest.ModificationRequest, error) { + var req guestrequest.ModificationRequest + switch osType { + case "windows": + // Windows doesn't support an unplug operation, so treat as no-op. + case "linux": + req = guestrequest.ModificationRequest{ + ResourceType: guestresource.ResourceTypeSCSIDevice, + RequestType: guestrequest.RequestTypeRemove, + Settings: guestresource.SCSIDevice{ + Controller: uint8(controller), + Lun: uint8(lun), + }, + } + default: + return guestrequest.ModificationRequest{}, fmt.Errorf("unsupported os type: %s", osType) + } + return req, nil +} diff --git a/internal/uvm/scsi/doc.go b/internal/uvm/scsi/doc.go new file mode 100644 index 0000000000..cb8b2d2bb8 --- /dev/null +++ b/internal/uvm/scsi/doc.go @@ -0,0 +1,6 @@ +// Package scsi handles SCSI device attachment and mounting for VMs. +// The primary entrypoint to the package is [Manager]. +// +// The backend implementation of working with disks for a given VM is +// provided by the interfaecs [Attacher], [Mounter], and [Unplugger]. +package scsi diff --git a/internal/uvm/scsi/manager.go b/internal/uvm/scsi/manager.go new file mode 100644 index 0000000000..e2c25d5eb6 --- /dev/null +++ b/internal/uvm/scsi/manager.go @@ -0,0 +1,322 @@ +package scsi + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/security" + "github.com/Microsoft/hcsshim/internal/verity" + "github.com/Microsoft/hcsshim/internal/wclayer" + "github.com/sirupsen/logrus" +) + +var ( + // ErrNoAvailableLocation indicates that a new SCSI attachment failed because + // no new slots were available. + ErrNoAvailableLocation = errors.New("no available location") + // ErrNotInitialized is returned when a method is invoked on a nil [Manager]. + ErrNotInitialized = errors.New("SCSI manager not initialized") + // ErrAlreadyReleased is returned when [Mount.Release] is called on a Mount + // that had already been released. + ErrAlreadyReleased = errors.New("mount was already released") +) + +// Manager is the primary entrypoint for managing SCSI devices on a VM. +// It tracks the state of what devices have been attached to the VM, and +// mounted inside the guest OS. +type Manager struct { + attachManager *attachManager + mountManager *mountManager +} + +// Slot represents a single SCSI slot, consisting of a controller and LUN. +type Slot struct { + Controller uint + LUN uint +} + +// NewManager creates a new Manager using the provided backend [Attacher], +// [Mounter], and [Unplugger], as well as other configuration parameters. +// +// guestMountFmt is the format string to use for mounts of SCSI devices in +// the guest OS. It should have a single %d format parameter. +// +// reservedSlots indicates which SCSI slots to treat as already used. They +// will not be handed out again by the Manager. +func NewManager( + attacher Attacher, + mounter Mounter, + unplugger Unplugger, + numControllers int, + numLUNsPerController int, + guestMountFmt string, + reservedSlots []Slot, +) *Manager { + am := newAttachManager(attacher, unplugger, numControllers, numLUNsPerController, reservedSlots) + mm := newMountManager(mounter, guestMountFmt) + return &Manager{am, mm} +} + +// MountConfig specifies the options to apply for mounting a SCSI device in +// the guest OS. +type MountConfig struct { + Encrypted bool + Options []string +} + +// Mount represents a SCSI device that has been attached to a VM, and potentially +// also mounted into the guest OS. +type Mount struct { + mgr *Manager + controller uint + lun uint + guestPath string + releaseOnce sync.Once +} + +// Controller returns the controller number that the SCSI device is attached to. +func (m *Mount) Controller() uint { + return m.controller +} + +// LUN returns the LUN number that the SCSI device is attached to. +func (m *Mount) LUN() uint { + return m.lun +} + +// GuestPath returns the path inside the guest OS where the SCSI device was mounted. +// Will return an empty string if no guest mount was performed. +func (m *Mount) GuestPath() string { + return m.guestPath +} + +// Release releases the SCSI mount. Refcount tracking is used in case multiple instances +// of the same attachment or mount are used. If the refcount for the guest OS mount +// reaches 0, the guest OS mount is removed. If the refcount for the SCSI attachment +// reaches 0, the SCSI attachment is removed. +func (m *Mount) Release(ctx context.Context) (err error) { + err = ErrAlreadyReleased + m.releaseOnce.Do(func() { + err = m.mgr.remove(ctx, m.controller, m.lun, m.guestPath) + }) + return +} + +// AddVirtualDisk attaches and mounts a VHD on the host to the VM. If the same +// VHD has already been attached to the VM, the existing attachment will +// be reused. If the same VHD has already been mounted in the guest OS +// with the same MountConfig, the same mount will be reused. +// +// vmAccess determines what ACL to apply to the physical disk before it is attached. +// If nil, no ACL is applied. +// +// mc determines the settings to apply on the guest OS mount. If +// it is nil, no guest OS mount is performed. +func (m *Manager) AddVirtualDisk( + ctx context.Context, + hostPath string, + readOnly bool, + vmAccess VMAccessGranter, + mc *MountConfig, +) (*Mount, error) { + if m == nil { + return nil, ErrNotInitialized + } + if vmAccess != nil { + if err := vmAccess(ctx, hostPath); err != nil { + return nil, err + } + } + var mcInternal *mountConfig + if mc != nil { + mcInternal = &mountConfig{ + readOnly: readOnly, + encrypted: mc.Encrypted, + options: mc.Options, + verity: readVerityInfo(ctx, hostPath), + } + } + return m.add(ctx, + &attachConfig{ + path: hostPath, + readOnly: readOnly, + typ: "VirtualDisk", + }, + mcInternal) +} + +// AddPhysicalDisk attaches and mounts a physical disk on the host to the VM. +// If the same physical disk has already been attached to the VM, the existing +// attachment will be reused. If the same physical disk has already been mounted +// in the guest OS with the same MountConfig, the same mount will be reused. +// +// vmAccess determines what ACL to apply to the physical disk before it is attached. +// If nil, no ACL is applied. +// +// mc determines the settings to apply on the guest OS mount. If +// it is nil, no guest OS mount is performed. +func (m *Manager) AddPhysicalDisk( + ctx context.Context, + hostPath string, + readOnly bool, + vmAccess VMAccessGranter, + mc *MountConfig, +) (*Mount, error) { + if m == nil { + return nil, ErrNotInitialized + } + if vmAccess != nil { + if err := vmAccess(ctx, hostPath); err != nil { + return nil, err + } + } + var mcInternal *mountConfig + if mc != nil { + mcInternal = &mountConfig{ + readOnly: readOnly, + encrypted: mc.Encrypted, + options: mc.Options, + verity: readVerityInfo(ctx, hostPath), + } + } + return m.add(ctx, + &attachConfig{ + path: hostPath, + readOnly: readOnly, + typ: "PassThru", + }, + mcInternal) +} + +// AddExtensibleVirtualDisk attaches and mounts an extensible virtual disk (EVD) to the VM. +// EVDs are made available by special drivers on the host which interact with the Hyper-V +// synthetic SCSI stack. +// If the same physical disk has already been attached to the VM, the existing +// attachment will be reused. If the same physical disk has already been mounted +// in the guest OS with the same MountConfig, the same mount will be reused. +// +// hostPath must adhere to the format "evd:///". +// +// mc determines the settings to apply on the guest OS mount. If +// it is nil, no guest OS mount is performed. +func (m *Manager) AddExtensibleVirtualDisk( + ctx context.Context, + hostPath string, + readOnly bool, + mc *MountConfig, +) (*Mount, error) { + if m == nil { + return nil, ErrNotInitialized + } + evdType, mountPath, err := parseExtensibleVirtualDiskPath(hostPath) + if err != nil { + return nil, err + } + var mcInternal *mountConfig + if mc != nil { + mcInternal = &mountConfig{ + readOnly: readOnly, + encrypted: mc.Encrypted, + options: mc.Options, + } + } + return m.add(ctx, + &attachConfig{ + path: mountPath, + readOnly: readOnly, + typ: "ExtensibleVirtualDisk", + evdType: evdType, + }, + mcInternal) +} + +func (m *Manager) add(ctx context.Context, attachConfig *attachConfig, mountConfig *mountConfig) (_ *Mount, err error) { + controller, lun, err := m.attachManager.attach(ctx, attachConfig) + if err != nil { + return nil, err + } + defer func() { + if err != nil { + m.attachManager.detach(ctx, controller, lun) + } + }() + + var guestPath string + if mountConfig != nil { + guestPath, err = m.mountManager.mount(ctx, controller, lun, mountConfig) + if err != nil { + return nil, err + } + } + + return &Mount{mgr: m, controller: controller, lun: lun, guestPath: guestPath}, nil +} + +func (m *Manager) remove(ctx context.Context, controller, lun uint, guestPath string) error { + if guestPath != "" { + removed, err := m.mountManager.unmount(ctx, guestPath) + if err != nil { + return err + } + + if !removed { + return nil + } + } + + if _, err := m.attachManager.detach(ctx, controller, lun); err != nil { + return err + } + + return nil +} + +func readVerityInfo(ctx context.Context, path string) *guestresource.DeviceVerityInfo { + if v, iErr := verity.ReadVeritySuperBlock(ctx, path); iErr != nil { + log.G(ctx).WithError(iErr).WithField("hostPath", path).Debug("unable to read dm-verity information from VHD") + } else { + if v != nil { + log.G(ctx).WithFields(logrus.Fields{ + "hostPath": path, + "rootDigest": v.RootDigest, + }).Debug("adding SCSI with dm-verity") + } + return v + } + return nil +} + +// VMAccessGranter controls what ACL is applied to the device before it is attached. +type VMAccessGranter func(ctx context.Context, path string) error + +// IndividualVMAccess gives the specified VM access to the path. +func IndividualVMAccess(vmID string) VMAccessGranter { + return func(ctx context.Context, path string) error { + return wclayer.GrantVmAccess(ctx, vmID, path) + } +} + +// GroupVMAccess gives the VM group SID access to the path. +func GroupVMAccess() VMAccessGranter { + return func(ctx context.Context, path string) error { + log.G(ctx).WithField("path", path).Debug("granting vm group access") + return security.GrantVmGroupAccess(path) + } +} + +// parseExtensibleVirtualDiskPath parses the evd path provided in the config. +// extensible virtual disk path has format "evd:///" +// this function parses that and returns the `evdType` and `evd-mount-path`. +func parseExtensibleVirtualDiskPath(hostPath string) (evdType, mountPath string, err error) { + trimmedPath := strings.TrimPrefix(hostPath, "evd://") + separatorIndex := strings.Index(trimmedPath, "/") + if separatorIndex <= 0 { + return "", "", fmt.Errorf("invalid extensible vhd path: %s", hostPath) + } + return trimmedPath[:separatorIndex], trimmedPath[separatorIndex+1:], nil +} diff --git a/internal/uvm/scsi/mount.go b/internal/uvm/scsi/mount.go new file mode 100644 index 0000000000..0f61148c5d --- /dev/null +++ b/internal/uvm/scsi/mount.go @@ -0,0 +1,165 @@ +package scsi + +import ( + "context" + "fmt" + "sync" + + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +type mountManager struct { + m sync.Mutex + mounter Mounter + mounts []*mount + mountFmt string + mountNum uint +} + +func newMountManager(mounter Mounter, mountFmt string) *mountManager { + return &mountManager{ + mounter: mounter, + mountFmt: mountFmt, + } +} + +type mount struct { + path string + index int + controller uint + lun uint + config *mountConfig + waitErr error + waitCh chan struct{} + refCount uint +} + +type mountConfig struct { + readOnly bool + encrypted bool + verity *guestresource.DeviceVerityInfo + options []string +} + +func mountConfigEquals(a, b *mountConfig) bool { + return a.readOnly == b.readOnly && + a.encrypted == b.encrypted && + *a.verity == *b.verity && + strSliceEquals(a.options, b.options) +} + +func strSliceEquals(a, b []string) bool { + if a == nil || b == nil { + return a == nil && b == nil + } + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func (mm *mountManager) mount(ctx context.Context, controller, lun uint, c *mountConfig) (_ string, err error) { + mount, existed := mm.trackMount(controller, lun, c) + if existed { + <-mount.waitCh + if mount.waitErr != nil { + return "", mount.waitErr + } + return mount.path, nil + } + + defer func() { + if err != nil { + mm.untrackMount(mount, true) + } + + mount.waitErr = err + close(mount.waitCh) + }() + + if err := mm.mounter.mount(ctx, controller, lun, mount.path, c); err != nil { + return "", fmt.Errorf("mount scsi controller %d lun %d at %s: %w", controller, lun, mount.path, err) + } + return mount.path, nil +} + +func (mm *mountManager) unmount(ctx context.Context, path string) (bool, error) { + mm.m.Lock() + defer mm.m.Unlock() + + var mount *mount + for _, mount = range mm.mounts { + if mount.path == path { + break + } + } + + mount.refCount-- + if mount.refCount > 0 { + return false, nil + } + + if err := mm.mounter.unmount(ctx, mount.controller, mount.lun, mount.path, mount.config); err != nil { + return false, fmt.Errorf("unmount scsi controller %d lun %d at path %s: %w", mount.controller, mount.lun, mount.path, err) + } + mm.untrackMount(mount, false) + + return true, nil +} + +func (mm *mountManager) trackMount(controller, lun uint, c *mountConfig) (*mount, bool) { + mm.m.Lock() + defer mm.m.Unlock() + + var freeIndex int = -1 + for i, mount := range mm.mounts { + if mount == nil && freeIndex == -1 { + freeIndex = i + } + if controller == mount.controller && + lun == mount.lun && + mountConfigEquals(c, mount.config) { + + mount.refCount++ + return mount, true + } + } + + // New mount. + mount := &mount{ + path: mm.mountPath(), + controller: controller, + lun: lun, + config: c, + refCount: 1, + waitCh: make(chan struct{}), + } + if freeIndex == -1 { + mount.index = len(mm.mounts) + mm.mounts = append(mm.mounts, mount) + } else { + mount.index = freeIndex + mm.mounts[freeIndex] = mount + } + return mount, false +} + +func (mm *mountManager) untrackMount(mount *mount, takeLock bool) { + if takeLock { + mm.m.Lock() + defer mm.m.Unlock() + } + + mm.mounts[mount.index] = nil +} + +func (mm *mountManager) mountPath() string { + mountNum := mm.mountNum + mm.mountNum++ + return fmt.Sprintf(mm.mountFmt, mountNum) +} diff --git a/internal/uvm/start.go b/internal/uvm/start.go index 4a77b3b2d0..1a0d90babe 100644 --- a/internal/uvm/start.go +++ b/internal/uvm/start.go @@ -18,6 +18,7 @@ import ( "golang.org/x/sys/windows" "github.com/Microsoft/hcsshim/internal/gcs" + "github.com/Microsoft/hcsshim/internal/guestpath" "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" @@ -25,6 +26,7 @@ import ( "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" ) // entropyBytes is the number of bytes of random data to send to a Linux UVM @@ -281,6 +283,31 @@ func (uvm *UtilityVM) Start(ctx context.Context) (err error) { uvm.protocol = properties.GuestConnectionInfo.ProtocolVersion } + // Initialize the SCSIManager. + var ( + mounter scsi.Mounter + unplugger scsi.Unplugger + ) + if uvm.gc != nil { + mounter = scsi.NewBridgeMounter(uvm.gc, uvm.OS()) + unplugger = scsi.NewBridgeUnplugger(uvm.gc, uvm.OS()) + } else { + mounter = scsi.NewHCSMounter(uvm.hcsSystem, uvm.OS()) + unplugger = scsi.NewHCSUnplugger(uvm.hcsSystem, uvm.OS()) + } + guestMountFmt := guestpath.WCOWGlobalMountPrefixFmt + if uvm.OS() == "linux" { + guestMountFmt = guestpath.LCOWGlobalMountPrefixFmt + } + uvm.SCSIManager = scsi.NewManager( + scsi.NewHCSAttacher(uvm.hcsSystem), + mounter, + unplugger, + int(uvm.scsiControllerCount), + 64, // LUNs per controller, fixed by Hyper-V. + guestMountFmt, + uvm.reservedSCSISlots) + if uvm.confidentialUVMOptions != nil && uvm.OS() == "linux" { copts := []ConfidentialUVMOpt{ WithSecurityPolicy(uvm.confidentialUVMOptions.SecurityPolicy), diff --git a/internal/uvm/types.go b/internal/uvm/types.go index e925fe4cdc..392e9b55d9 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -13,6 +13,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs/schema1" "github.com/Microsoft/hcsshim/internal/hns" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" ) // | WCOW | LCOW @@ -87,11 +88,12 @@ type UtilityVM struct { vpmemDevicesMultiMapped [MaxVPMEMCount]*vPMemInfoMulti // SCSI devices that are mapped into a Windows or Linux utility VM - scsiLocations [4][64]*SCSIMount // Hyper-V supports 4 controllers, 64 slots per controller. Limited to 1 controller for now though. - scsiControllerCount uint32 // Number of SCSI controllers in the utility VM - encryptScratch bool // Enable scratch encryption + SCSIManager *scsi.Manager + scsiControllerCount uint32 // Number of SCSI controllers in the utility VM + reservedSCSISlots []scsi.Slot - vpciDevices map[VPCIDeviceKey]*VPCIDevice // map of device instance id to vpci device + encryptScratch bool // Enable scratch encryption + vpciDevices map[VPCIDeviceKey]*VPCIDevice // map of device instance id to vpci device // Plan9 are directories mapped into a Linux utility VM plan9Counter uint64 // Each newly-added plan9 share has a counter used as its ID in the ResourceURI and for the name @@ -140,3 +142,7 @@ type UtilityVM struct { // confidentialUVMOptions hold confidential UVM specific options confidentialUVMOptions *ConfidentialOptions } + +func (uvm *UtilityVM) ScratchEncryptionEnabled() bool { + return uvm.encryptScratch +} diff --git a/internal/uvm/vpmem.go b/internal/uvm/vpmem.go index 4d0d50d963..4adcae1621 100644 --- a/internal/uvm/vpmem.go +++ b/internal/uvm/vpmem.go @@ -10,13 +10,12 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" - "github.com/Microsoft/hcsshim/ext4/dmverity" - "github.com/Microsoft/hcsshim/ext4/tar2ext4" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/verity" ) const ( @@ -26,7 +25,9 @@ const ( var ( // ErrMaxVPMemLayerSize is the error returned when the size of `hostPath` is // greater than the max vPMem layer size set at create time. - ErrMaxVPMemLayerSize = errors.New("layer size is to large for VPMEM max size") + ErrMaxVPMemLayerSize = errors.New("layer size is to large for VPMEM max size") + ErrNoAvailableLocation = fmt.Errorf("no available location") + ErrNotAttached = fmt.Errorf("not attached") ) // var _ resources.ResourceCloser = &VPMEMMount{} -- Causes an import cycle. @@ -55,51 +56,6 @@ func newDefaultVPMemInfo(hostPath, uvmPath string) *vPMemInfoDefault { } } -// fileSystemSize retrieves ext4 fs SuperBlock and returns the file system size and block size -func fileSystemSize(vhdPath string) (int64, int, error) { - sb, err := tar2ext4.ReadExt4SuperBlock(vhdPath) - if err != nil { - return 0, 0, errors.Wrap(err, "failed to read ext4 super block") - } - blockSize := 1024 * (1 << sb.LogBlockSize) - fsSize := int64(blockSize) * int64(sb.BlocksCountLow) - return fsSize, blockSize, nil -} - -// readVeritySuperBlock reads ext4 super block for a given VHD to then further read the dm-verity super block -// and root hash -func readVeritySuperBlock(ctx context.Context, layerPath string) (*guestresource.DeviceVerityInfo, error) { - // dm-verity information is expected to be appended, the size of ext4 data will be the offset - // of the dm-verity super block, followed by merkle hash tree - ext4SizeInBytes, ext4BlockSize, err := fileSystemSize(layerPath) - if err != nil { - return nil, err - } - - dmvsb, err := dmverity.ReadDMVerityInfo(layerPath, ext4SizeInBytes) - if err != nil { - return nil, errors.Wrap(err, "failed to read dm-verity super block") - } - log.G(ctx).WithFields(logrus.Fields{ - "layerPath": layerPath, - "rootHash": dmvsb.RootDigest, - "algorithm": dmvsb.Algorithm, - "salt": dmvsb.Salt, - "dataBlocks": dmvsb.DataBlocks, - "dataBlockSize": dmvsb.DataBlockSize, - }).Debug("dm-verity information") - - return &guestresource.DeviceVerityInfo{ - Ext4SizeInBytes: ext4SizeInBytes, - BlockSize: ext4BlockSize, - RootDigest: dmvsb.RootDigest, - Algorithm: dmvsb.Algorithm, - Salt: dmvsb.Salt, - Version: int(dmvsb.Version), - SuperBlock: true, - }, nil -} - // findNextVPMemSlot finds next available VPMem slot. // // Lock MUST be held when calling this function. @@ -171,7 +127,7 @@ func (uvm *UtilityVM) addVPMemDefault(ctx context.Context, hostPath string) (_ s DeviceNumber: deviceNumber, MountPath: uvmPath, } - if v, iErr := readVeritySuperBlock(ctx, hostPath); iErr != nil { + if v, iErr := verity.ReadVeritySuperBlock(ctx, hostPath); iErr != nil { log.G(ctx).WithError(iErr).WithField("hostPath", hostPath).Debug("unable to read dm-verity information from VHD") } else { if v != nil { @@ -211,13 +167,12 @@ func (uvm *UtilityVM) removeVPMemDefault(ctx context.Context, hostPath string) e return nil } - var verity *guestresource.DeviceVerityInfo - if v, _ := readVeritySuperBlock(ctx, hostPath); v != nil { + verity, _ := verity.ReadVeritySuperBlock(ctx, hostPath) + if verity != nil { log.G(ctx).WithFields(logrus.Fields{ "hostPath": hostPath, - "rootDigest": v.RootDigest, + "rootDigest": verity.RootDigest, }).Debug("removing VPMem with dm-verity") - verity = v } modification := &hcsschema.ModifySettingRequest{ RequestType: guestrequest.RequestTypeRemove, diff --git a/internal/uvm/vpmem_mapped.go b/internal/uvm/vpmem_mapped.go index 0cb2230dab..191644a8f4 100644 --- a/internal/uvm/vpmem_mapped.go +++ b/internal/uvm/vpmem_mapped.go @@ -16,6 +16,7 @@ import ( "github.com/Microsoft/hcsshim/internal/memory" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/verity" ) const ( @@ -84,7 +85,7 @@ func newMappedVPMemModifyRequest( }, } - if verity, err := readVeritySuperBlock(ctx, md.hostPath); err != nil { + if verity, err := verity.ReadVeritySuperBlock(ctx, md.hostPath); err != nil { log.G(ctx).WithError(err).WithField("hostPath", md.hostPath).Debug("unable to read dm-verity information from VHD") } else { log.G(ctx).WithFields(logrus.Fields{ diff --git a/internal/verity/verity.go b/internal/verity/verity.go new file mode 100644 index 0000000000..7aef0ce65e --- /dev/null +++ b/internal/verity/verity.go @@ -0,0 +1,57 @@ +package verity + +import ( + "context" + + "github.com/Microsoft/hcsshim/ext4/dmverity" + "github.com/Microsoft/hcsshim/ext4/tar2ext4" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// fileSystemSize retrieves ext4 fs SuperBlock and returns the file system size and block size +func fileSystemSize(vhdPath string) (int64, int, error) { + sb, err := tar2ext4.ReadExt4SuperBlock(vhdPath) + if err != nil { + return 0, 0, errors.Wrap(err, "failed to read ext4 super block") + } + blockSize := 1024 * (1 << sb.LogBlockSize) + fsSize := int64(blockSize) * int64(sb.BlocksCountLow) + return fsSize, blockSize, nil +} + +// ReadVeritySuperBlock reads ext4 super block for a given VHD to then further read the dm-verity super block +// and root hash +func ReadVeritySuperBlock(ctx context.Context, layerPath string) (*guestresource.DeviceVerityInfo, error) { + // dm-verity information is expected to be appended, the size of ext4 data will be the offset + // of the dm-verity super block, followed by merkle hash tree + ext4SizeInBytes, ext4BlockSize, err := fileSystemSize(layerPath) + if err != nil { + return nil, err + } + + dmvsb, err := dmverity.ReadDMVerityInfo(layerPath, ext4SizeInBytes) + if err != nil { + return nil, errors.Wrap(err, "failed to read dm-verity super block") + } + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layerPath, + "rootHash": dmvsb.RootDigest, + "algorithm": dmvsb.Algorithm, + "salt": dmvsb.Salt, + "dataBlocks": dmvsb.DataBlocks, + "dataBlockSize": dmvsb.DataBlockSize, + }).Debug("dm-verity information") + + return &guestresource.DeviceVerityInfo{ + Ext4SizeInBytes: ext4SizeInBytes, + BlockSize: ext4BlockSize, + RootDigest: dmvsb.RootDigest, + Algorithm: dmvsb.Algorithm, + Salt: dmvsb.Salt, + Version: int(dmvsb.Version), + SuperBlock: true, + }, nil +} diff --git a/test/functional/lcow_test.go b/test/functional/lcow_test.go index b2895d6a65..58991bc34e 100644 --- a/test/functional/lcow_test.go +++ b/test/functional/lcow_test.go @@ -20,6 +20,7 @@ import ( "github.com/Microsoft/hcsshim/internal/lcow" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/osversion" testutilities "github.com/Microsoft/hcsshim/test/internal" @@ -169,7 +170,7 @@ func TestLCOWSimplePodScenario(t *testing.T) { cacheDir := t.TempDir() cacheFile := filepath.Join(cacheDir, "cache.vhdx") - // This is what gets mounted into /tmp/scratch + // This is what gets mounted for UVM scratch uvmScratchDir := t.TempDir() uvmScratchFile := filepath.Join(uvmScratchDir, "uvmscratch.vhdx") @@ -184,13 +185,13 @@ func TestLCOWSimplePodScenario(t *testing.T) { lcowUVM := testuvm.CreateAndStartLCOW(context.Background(), t, "uvm") defer lcowUVM.Close() - // Populate the cache and generate the scratch file for /tmp/scratch + // Populate the cache and generate the scratch file if err := lcow.CreateScratch(context.Background(), lcowUVM, uvmScratchFile, lcow.DefaultScratchSizeGB, cacheFile); err != nil { t.Fatal(err) } - var options []string - if _, err := lcowUVM.AddSCSI(context.Background(), uvmScratchFile, `/tmp/scratch`, false, false, options, uvm.VMAccessTypeIndividual); err != nil { + _, err := lcowUVM.SCSIManager.AddVirtualDisk(context.Background(), uvmScratchFile, false, scsi.IndividualVMAccess(lcowUVM.ID()), &scsi.MountConfig{}) + if err != nil { t.Fatal(err) } diff --git a/test/functional/uvm_scratch_test.go b/test/functional/uvm_scratch_test.go index 5787595642..2184250e39 100644 --- a/test/functional/uvm_scratch_test.go +++ b/test/functional/uvm_scratch_test.go @@ -11,7 +11,7 @@ import ( "testing" "github.com/Microsoft/hcsshim/internal/lcow" - "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/osversion" "github.com/Microsoft/hcsshim/test/pkg/require" tuvm "github.com/Microsoft/hcsshim/test/pkg/uvm" @@ -50,12 +50,11 @@ func TestScratchCreateLCOW(t *testing.T) { } // Make sure it can be added (verifies it has access correctly) - var options []string - scsiMount, err := targetUVM.AddSCSI(context.Background(), destTwo, "", false, false, options, uvm.VMAccessTypeIndividual) + scsiMount, err := targetUVM.SCSIManager.AddVirtualDisk(context.Background(), destTwo, false, scsi.IndividualVMAccess(targetUVM.ID()), nil) if err != nil { t.Fatal(err) } - if scsiMount.Controller != 0 && scsiMount.LUN != 0 { + if scsiMount.Controller() != 0 && scsiMount.LUN() != 0 { t.Fatal(err) } // TODO Could consider giving it a host path and verifying it's contents somehow diff --git a/test/functional/uvm_scsi_test.go b/test/functional/uvm_scsi_test.go index 737213657c..6ea7cb0bb8 100644 --- a/test/functional/uvm_scsi_test.go +++ b/test/functional/uvm_scsi_test.go @@ -18,6 +18,7 @@ import ( "github.com/Microsoft/hcsshim/internal/lcow" "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" "github.com/Microsoft/hcsshim/osversion" testutilities "github.com/Microsoft/hcsshim/test/internal" "github.com/Microsoft/hcsshim/test/pkg/require" @@ -54,27 +55,25 @@ func TestSCSIAddRemoveWCOW(t *testing.T) { testSCSIAddRemoveSingle(t, u, `c:\`, "windows", layers) } -func testAddSCSI(u *uvm.UtilityVM, disks []string, pathPrefix string, usePath bool, reAdd bool) error { +func testAddSCSI(u *uvm.UtilityVM, disks []string, attachOnly bool) ([]*scsi.Mount, error) { + mounts := make([]*scsi.Mount, 0, len(disks)) for i := range disks { - uvmPath := "" - if usePath { - uvmPath = fmt.Sprintf(`%s%d`, pathPrefix, i) + var mc *scsi.MountConfig + if !attachOnly { + mc = &scsi.MountConfig{} } - var options []string - scsiMount, err := u.AddSCSI(context.Background(), disks[i], uvmPath, false, false, options, uvm.VMAccessTypeIndividual) + scsiMount, err := u.SCSIManager.AddVirtualDisk(context.Background(), disks[i], false, scsi.IndividualVMAccess(u.ID()), mc) if err != nil { - return err - } - if reAdd && scsiMount.UVMPath != uvmPath { - return fmt.Errorf("expecting existing path to be %s but it is %s", uvmPath, scsiMount.UVMPath) + return nil, err } + mounts = append(mounts, scsiMount) } - return nil + return mounts, nil } -func testRemoveAllSCSI(u *uvm.UtilityVM, disks []string) error { - for i := range disks { - if err := u.RemoveSCSI(context.Background(), disks[i]); err != nil { +func testRemoveAllSCSI(mounts []*scsi.Mount) error { + for _, m := range mounts { + if err := m.Release(context.Background()); err != nil { return err } } @@ -103,30 +102,28 @@ func testSCSIAddRemoveSingle(t *testing.T, u *uvm.UtilityVM, pathPrefix string, } // Add each of the disks to the utility VM. Attach-only, no container path - useUvmPathPrefix := false logrus.Debugln("First - adding in attach-only") - err := testAddSCSI(u, disks, pathPrefix, useUvmPathPrefix, false) + mounts, err := testAddSCSI(u, disks, true) if err != nil { t.Fatalf("failed to add SCSI device: %v", err) } // Remove them all logrus.Debugln("Removing them all") - err = testRemoveAllSCSI(u, disks) + err = testRemoveAllSCSI(mounts) if err != nil { t.Fatalf("failed to remove SCSI disk: %v", err) } // Now re-add but providing a container path - useUvmPathPrefix = true logrus.Debugln("Next - re-adding with a container path") - err = testAddSCSI(u, disks, pathPrefix, useUvmPathPrefix, false) + mounts, err = testAddSCSI(u, disks, true) if err != nil { t.Fatalf("failed to add SCSI device: %v", err) } logrus.Debugln("Next - Removing them") - err = testRemoveAllSCSI(u, disks) + err = testRemoveAllSCSI(mounts) if err != nil { t.Fatalf("failed to remove SCSI disk: %v", err) } @@ -154,9 +151,8 @@ func testSCSIAddRemoveMultiple(t *testing.T, u *uvm.UtilityVM, pathPrefix string } // Add each of the disks to the utility VM. Attach-only, no container path - useUvmPathPrefix := false logrus.Debugln("First - adding in attach-only") - err := testAddSCSI(u, disks, pathPrefix, useUvmPathPrefix, false) + mounts1, err := testAddSCSI(u, disks, true) if err != nil { t.Fatalf("failed to add SCSI device: %v", err) } @@ -164,7 +160,7 @@ func testSCSIAddRemoveMultiple(t *testing.T, u *uvm.UtilityVM, pathPrefix string // Try to re-add. // We only support re-adding the same scsi device for lcow right now logrus.Debugln("Next - trying to re-add") - err = testAddSCSI(u, disks, pathPrefix, useUvmPathPrefix, true) + mounts2, err := testAddSCSI(u, disks, false) if err != nil { t.Fatalf("failed to re-add SCSI device: %v", err) } @@ -172,39 +168,38 @@ func testSCSIAddRemoveMultiple(t *testing.T, u *uvm.UtilityVM, pathPrefix string // Remove them all logrus.Debugln("Removing them all") // first removal decrements ref count - err = testRemoveAllSCSI(u, disks) + err = testRemoveAllSCSI(mounts1) if err != nil { t.Fatalf("failed to remove SCSI disk: %v", err) } // second removal actually removes the device - err = testRemoveAllSCSI(u, disks) + err = testRemoveAllSCSI(mounts2) if err != nil { t.Fatalf("failed to remove SCSI disk: %v", err) } // Now re-add but providing a container path logrus.Debugln("Next - re-adding with a container path") - useUvmPathPrefix = true - err = testAddSCSI(u, disks, pathPrefix, useUvmPathPrefix, false) + mounts1, err = testAddSCSI(u, disks, true) if err != nil { t.Fatalf("failed to add SCSI device: %v", err) } // Try to re-add logrus.Debugln("Next - trying to re-add") - err = testAddSCSI(u, disks, pathPrefix, useUvmPathPrefix, true) + mounts2, err = testAddSCSI(u, disks, false) if err != nil { t.Fatalf("failed to add SCSI device: %v", err) } logrus.Debugln("Next - Removing them") // first removal decrements ref count - err = testRemoveAllSCSI(u, disks) + err = testRemoveAllSCSI(mounts1) if err != nil { t.Fatalf("failed to remove SCSI disk: %v", err) } // second removal actually removes the device - err = testRemoveAllSCSI(u, disks) + err = testRemoveAllSCSI(mounts2) if err != nil { t.Fatalf("failed to remove SCSI disk: %v", err) } @@ -281,29 +276,28 @@ func TestParallelScsiOps(t *testing.T) { continue } - var options []string - _, err = u.AddSCSI(context.Background(), path, "", false, false, options, uvm.VMAccessTypeIndividual) + mount, err := u.SCSIManager.AddVirtualDisk(context.Background(), path, false, scsi.IndividualVMAccess(u.ID()), nil) if err != nil { os.Remove(path) - t.Errorf("failed to AddSCSI for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) + t.Errorf("failed to add SCSI disk for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) continue } - err = u.RemoveSCSI(context.Background(), path) + err = mount.Release(context.Background()) if err != nil { - t.Errorf("failed to RemoveSCSI for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) + t.Errorf("failed to remove SCSI disk for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) // This worker cant continue because the index is dead. We have to stop break } - _, err = u.AddSCSI(context.Background(), path, fmt.Sprintf("/run/gcs/c/0/scsi/%d", iteration), false, false, options, uvm.VMAccessTypeIndividual) + mount, err = u.SCSIManager.AddVirtualDisk(context.Background(), path, false, scsi.IndividualVMAccess(u.ID()), &scsi.MountConfig{}) if err != nil { os.Remove(path) - t.Errorf("failed to AddSCSI for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) + t.Errorf("failed to add SCSI disk for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) continue } - err = u.RemoveSCSI(context.Background(), path) + err = mount.Release(context.Background()) if err != nil { - t.Errorf("failed to RemoveSCSI for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) + t.Errorf("failed to remove SCSI disk for worker: %d, iteration: %d with err: %v", scsiIndex, iteration, err) // This worker cant continue because the index is dead. We have to stop break } diff --git a/test/functional/wcow_test.go b/test/functional/wcow_test.go index 84b118dca5..3061fdffa8 100644 --- a/test/functional/wcow_test.go +++ b/test/functional/wcow_test.go @@ -382,7 +382,7 @@ func TestWCOWArgonShim(t *testing.T) { id := "argon" // This is a cheat but stops us re-writing exactly the same code just for test - argonShimLocalMountPath, closer, err := layerspkg.MountWCOWLayers(context.Background(), id, append(imageLayers, argonShimScratchDir), "", "", nil) + argonShimLocalMountPath, closer, err := layerspkg.MountWCOWLayers(context.Background(), id, append(imageLayers, argonShimScratchDir), "", nil) if err != nil { t.Fatal(err) }