From cbc02614cdd3f467777e8737dc04cc47d6ff3eb4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 31 Mar 2020 23:23:17 -0700 Subject: [PATCH] Allow fscrypt to work in containers Update the /proc/self/mountinfo parsing code to allow selecting a Mount with Subtree != "/", i.e. a Mount not of the full filesystem. This is needed to allow fscrypt to work in containers, where the root of the filesystem may not be mounted. See findMainMount() for details about the algorithm used. Resolves https://github.com/google/fscrypt/issues/211 --- filesystem/filesystem.go | 7 +- filesystem/mountpoint.go | 177 ++++++++++++++++++++++++++++------ filesystem/mountpoint_test.go | 136 ++++++++++++++++++++++++-- 3 files changed, 278 insertions(+), 42 deletions(-) diff --git a/filesystem/filesystem.go b/filesystem/filesystem.go index e0ef1108..ecdeae1f 100644 --- a/filesystem/filesystem.go +++ b/filesystem/filesystem.go @@ -70,8 +70,9 @@ var ( // DeviceNumber - Device number of the filesystem. This is set even if // Device isn't, since all filesystems have a device // number assigned by the kernel, even pseudo-filesystems. -// BindMnt - True if this mount is not for the full filesystem but -// rather is only for a subtree. +// Subtree - The mounted subtree of the filesystem. This is usually +// "/", meaning that the entire filesystem is mounted, but +// it can differ for bind mounts. // ReadOnly - True if this is a read-only mount // // In order to use a Mount to store fscrypt metadata, some directories must be @@ -99,7 +100,7 @@ type Mount struct { FilesystemType string Device string DeviceNumber DeviceNumber - BindMnt bool + Subtree string ReadOnly bool } diff --git a/filesystem/mountpoint.go b/filesystem/mountpoint.go index edbdc851..acddbae7 100644 --- a/filesystem/mountpoint.go +++ b/filesystem/mountpoint.go @@ -39,6 +39,11 @@ import ( var ( // This map holds data about the state of the system's filesystems. + // + // It only contains one Mount per filesystem, even if there are + // additional bind mounts, since we want to store fscrypt metadata in + // only one place per filesystem. If it is ambiguous which Mount should + // be used, an explicit nil entry is stored. mountsByDevice map[DeviceNumber]*Mount // Used to make the mount functions thread safe mountMutex sync.Mutex @@ -122,7 +127,7 @@ func parseMountInfoLine(line string) *Mount { if err != nil { return nil } - mnt.BindMnt = unescapeString(fields[3]) != "/" + mnt.Subtree = unescapeString(fields[3]) mnt.Path = unescapeString(fields[4]) for _, opt := range strings.Split(fields[5], ",") { if opt == "ro" { @@ -134,6 +139,124 @@ func parseMountInfoLine(line string) *Mount { return mnt } +type mountpointTreeNode struct { + mount *Mount + parent *mountpointTreeNode + children []*mountpointTreeNode +} + +func addUncontainedSubtreesRecursive(dst map[string]bool, + node *mountpointTreeNode, allUncontainedSubtrees map[string]bool) { + if allUncontainedSubtrees[node.mount.Subtree] { + dst[node.mount.Subtree] = true + } + for _, child := range node.children { + addUncontainedSubtreesRecursive(dst, child, allUncontainedSubtrees) + } +} + +// findMainMount finds the "main" Mount of a filesystem. The "main" Mount is +// where the filesystem's fscrypt metadata is stored. +// +// Normally, there is just one Mount and it's of the entire filesystem +// (mnt.Subtree == "/"). But in general, the filesystem might be mounted in +// multiple places, including "bind mounts" where mnt.Subtree != "/". Also, the +// filesystem might have a combination of read-write and read-only mounts. +// +// To handle most cases, we could just choose a mount with mnt.Subtree == "/", +// preferably a read-write mount. However, that doesn't work in containers +// where the "/" subtree might not be mounted. Here's a real-world example: +// +// mnt.Subtree mnt.Path +// ----------- -------- +// /var/lib/lxc/base/rootfs / +// /var/cache/pacman/pkg /var/cache/pacman/pkg +// /srv/repo/x86_64 /srv/http/x86_64 +// +// In this case, all mnt.Subtree are independent. To handle this case, we must +// choose the Mount whose mnt.Path contains the others, i.e. the first one. +// Note: the fscrypt metadata won't be usable from outside the container since +// it won't be at the real root of the filesystem, but that may be acceptable. +// +// However, we can't look *only* at mnt.Path, since in some cases mnt.Subtree is +// needed to correctly handle bind mounts. For example, in the following case, +// the first Mount should be chosen: +// +// mnt.Subtree mnt.Path +// ----------- -------- +// /foo /foo +// /foo/dir /dir +// +// To solve this, we divide the mounts into non-overlapping trees of mnt.Path. +// Then, we choose one of these trees which contains (exactly or via path +// prefix) *all* mnt.Subtree. We then return the root of this tree. In both +// the above examples, this algorithm returns the first Mount. +func findMainMount(filesystemMounts []*Mount) *Mount { + // Index this filesystem's mounts by path. Note: paths are unique here, + // since non-last mounts were already excluded earlier. + // + // Also build the set of all mounted subtrees. + mountsByPath := make(map[string]*mountpointTreeNode) + allSubtrees := make(map[string]bool) + for _, mnt := range filesystemMounts { + mountsByPath[mnt.Path] = &mountpointTreeNode{mount: mnt} + allSubtrees[mnt.Subtree] = true + } + + // Divide the mounts into non-overlapping trees of mountpoints. + for path, mntNode := range mountsByPath { + for path != "/" && mntNode.parent == nil { + path = filepath.Dir(path) + if parent := mountsByPath[path]; parent != nil { + mntNode.parent = parent + parent.children = append(parent.children, mntNode) + } + } + } + + // Build the set of mounted subtrees that aren't contained in any other + // mounted subtree. + allUncontainedSubtrees := make(map[string]bool) + for subtree := range allSubtrees { + contained := false + for t := subtree; t != "/" && !contained; { + t = filepath.Dir(t) + contained = allSubtrees[t] + } + if !contained { + allUncontainedSubtrees[subtree] = true + } + } + + // Select the root of a mountpoint tree whose mounted subtrees contain + // *all* mounted subtrees. Equivalently, select a mountpoint tree in + // which every uncontained subtree is mounted. + var mainMount *Mount + for _, mntNode := range mountsByPath { + mnt := mntNode.mount + if mntNode.parent != nil { + continue + } + uncontainedSubtrees := make(map[string]bool) + addUncontainedSubtreesRecursive(uncontainedSubtrees, mntNode, allUncontainedSubtrees) + if len(uncontainedSubtrees) != len(allUncontainedSubtrees) { + continue + } + // If there's more than one eligible mount, they should have the + // same Subtree. Otherwise it's ambiguous which one to use. + if mainMount != nil && mainMount.Subtree != mnt.Subtree { + log.Printf("Unsupported case: %q (%v) has multiple non-overlapping mounts. This filesystem will be ignored!", + mnt.Device, mnt.DeviceNumber) + return nil + } + // Prefer a read-write mount to a read-only one. + if mainMount == nil || mainMount.ReadOnly { + mainMount = mnt + } + } + return mainMount +} + // This is separate from loadMountInfo() only for unit testing. func readMountInfo(r io.Reader) error { mountsByPath := make(map[string]*Mount) @@ -159,23 +282,17 @@ func readMountInfo(r io.Reader) error { // mountpoints are listed in mount order. mountsByPath[mnt.Path] = mnt } - // fscrypt only really cares about the root directory of each - // filesystem, because that's where the fscrypt metadata is stored. So - // keep just one Mount per filesystem, ignoring bind mounts. Store that - // Mount in mountsByDevice so that it can be found later from the device - // number. Also, prefer a read-write mount to a read-only one. - // - // If the filesystem has *only* bind mounts, store an explicit nil entry - // so that we can show a useful error message later. + // For each filesystem, choose a "main" Mount and discard any additional + // bind mounts. fscrypt only cares about the main Mount, since it's + // where the fscrypt metadata is stored. Store all main Mounts in + // mountsByDevice so that they can be found by device number later. + allMountsByDevice := make(map[DeviceNumber][]*Mount) for _, mnt := range mountsByPath { - existingMnt, ok := mountsByDevice[mnt.DeviceNumber] - if mnt.BindMnt { - if !ok { - mountsByDevice[mnt.DeviceNumber] = nil - } - } else if existingMnt == nil || (existingMnt.ReadOnly && !mnt.ReadOnly) { - mountsByDevice[mnt.DeviceNumber] = mnt - } + allMountsByDevice[mnt.DeviceNumber] = + append(allMountsByDevice[mnt.DeviceNumber], mnt) + } + for deviceNumber, filesystemMounts := range allMountsByDevice { + mountsByDevice[deviceNumber] = findMainMount(filesystemMounts) } return nil } @@ -197,13 +314,13 @@ func loadMountInfo() error { return nil } -func filesystemRootDirNotVisibleError(deviceNumber DeviceNumber) error { - return errors.Errorf("root of filesystem on device %q (%v) is not visible in the current mount namespace", +func filesystemLacksMainMountError(deviceNumber DeviceNumber) error { + return errors.Errorf("Device %q (%v) lacks a \"main\" mountpoint in the current mount namespace, so it's ambiguous where to store the fscrypt metadata.", getDeviceName(deviceNumber), deviceNumber) } -// AllFilesystems lists all non-bind Mounts on the current system ordered by -// path. Use CheckSetup() to see if they are used with fscrypt. +// AllFilesystems lists all mounted filesystems ordered by path to their "main" +// Mount. Use CheckSetup() to see if they are set up for use with fscrypt. func AllFilesystems() ([]*Mount, error) { mountMutex.Lock() defer mountMutex.Unlock() @@ -250,24 +367,24 @@ func FindMount(path string) (*Mount, error) { return nil, errors.Errorf("couldn't find mountpoint containing %q", path) } if mnt == nil { - return nil, filesystemRootDirNotVisibleError(deviceNumber) + return nil, filesystemLacksMainMountError(deviceNumber) } return mnt, nil } // GetMount is like FindMount, except GetMount also returns an error if the path -// isn't the root directory of a filesystem. For example, if a filesystem is -// mounted at "/mnt" and the file "/mnt/a" exists, FindMount("/mnt/a") will -// succeed whereas GetMount("/mnt/a") will fail. +// doesn't name the same file as the filesystem's "main" Mount. For example, if +// a filesystem is fully mounted at "/mnt" and if "/mnt/a" exists, then +// FindMount("/mnt/a") will succeed whereas GetMount("/mnt/a") will fail. This +// is true even if "/mnt/a" is a bind mount of part of the same filesystem. func GetMount(mountpoint string) (*Mount, error) { mnt, err := FindMount(mountpoint) if err != nil { return nil, errors.Wrap(ErrNotAMountpoint, mountpoint) } - // Check whether 'mountpoint' is the root directory of the filesystem, - // i.e. is the same directory as 'mnt.Path'. Use os.SameFile() (i.e., - // compare inode numbers) rather than compare canonical paths, since the - // filesystem might be fully mounted in multiple places. + // Check whether 'mountpoint' names the same directory as 'mnt.Path'. + // Use os.SameFile() (i.e., compare inode numbers) rather than compare + // canonical paths, since filesystems may be mounted in multiple places. fi1, err := os.Stat(mountpoint) if err != nil { return nil, err @@ -323,7 +440,7 @@ func getMountFromLink(link string) (*Mount, error) { getDeviceName(deviceNumber), deviceNumber) } if mnt == nil { - return nil, filesystemRootDirNotVisibleError(deviceNumber) + return nil, filesystemLacksMainMountError(deviceNumber) } return mnt, nil } diff --git a/filesystem/mountpoint_test.go b/filesystem/mountpoint_test.go index ad0dab77..633ff947 100644 --- a/filesystem/mountpoint_test.go +++ b/filesystem/mountpoint_test.go @@ -84,8 +84,8 @@ func TestLoadMountInfoBasic(t *testing.T) { if mnt.DeviceNumber.String() != "259:3" { t.Error("Wrong device number") } - if mnt.BindMnt { - t.Error("Wrong bind mount flag") + if mnt.Subtree != "/" { + t.Error("Wrong subtree") } if mnt.ReadOnly { t.Error("Wrong readonly flag") @@ -220,8 +220,9 @@ func TestReadWriteMountIsPreferredOverReadOnlyMount(t *testing.T) { } } -// Test that a mount of the full filesystem is preferred over a bind mount. -func TestFullMountIsPreferredOverBindMount(t *testing.T) { +// Test that a mount of the full filesystem is preferred over mounts of non-root +// subtrees, given independent mountpoints. +func TestRootSubtreeIsPreferred(t *testing.T) { mountinfo := ` 222 15 259:3 /subtree1 /home rw shared:1 - ext4 /dev/root rw 222 15 259:3 / /mnt rw shared:1 - ext4 /dev/root rw @@ -231,16 +232,133 @@ func TestFullMountIsPreferredOverBindMount(t *testing.T) { defer endLoadMountInfoTest() loadMountInfoFromString(mountinfo) mnt := mountForDevice("259:3") - if mnt.Path != "/mnt" { + if mnt.Subtree != "/" { + t.Error("Wrong mount was chosen") + } +} + +// Test that a mount that is not of the full filesystem but still contains all +// other mounted subtrees is preferred, given independent mountpoints. +func TestHighestSubtreeIsPreferred(t *testing.T) { + mountinfo := ` +222 15 259:3 /foo/bar /mnt rw shared:1 - ext4 /dev/root rw +222 15 259:3 /foo /tmp rw shared:1 - ext4 /dev/root rw +222 15 259:3 /foo/baz /home rw shared:1 - ext4 /dev/root rw +` + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/foo" { + t.Error("Wrong mount was chosen") + } +} + +// Test that mountpoint "/" is preferred, given independent subtrees. +func TestRootMountpointIsPreferred(t *testing.T) { + mountinfo := ` +222 15 259:3 /var/cache/pacman/pkg /mnt rw shared:1 - ext4 /dev/root rw +222 15 259:3 /var/lib/lxc/base/rootfs / rw shared:1 - ext4 /dev/root rw +222 15 259:3 /srv/repo/x86_64 /home rw shared:1 - ext4 /dev/root rw +` + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/var/lib/lxc/base/rootfs" { + t.Error("Wrong mount was chosen") + } +} + +// Test that a mountpoint that is not "/" but still contains all other +// mountpoints is preferred, given independent subtrees. +func TestHighestMountpointIsPreferred(t *testing.T) { + tempDir, err := ioutil.TempDir("", "fscrypt") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempDir, err = filepath.Abs(tempDir) + if err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(tempDir+"/a/b", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/a/c", 0700); err != nil { + t.Fatal(err) + } + mountinfo := fmt.Sprintf(` +222 15 259:3 /0 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /1 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /2 %s rw shared:1 - ext4 /dev/root rw +`, tempDir+"/a/b", tempDir+"/a", tempDir+"/a/c") + + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/1" { + t.Error("Wrong mount was chosen") + } +} + +// Test that if some subtrees are contained in other subtrees, *and* some +// mountpoints are contained in other mountpoints, the chosen Mount is the root +// of a tree of mountpoints whose mounted subtrees contain all mounted subtrees. +func TestLoadContainedSubtreesAndMountpoints(t *testing.T) { + tempDir, err := ioutil.TempDir("", "fscrypt") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempDir, err = filepath.Abs(tempDir) + if err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(tempDir+"/a/b", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/a/c", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/d", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/e", 0700); err != nil { + t.Fatal(err) + } + // The first three mounts form a tree of mountpoints. The rest have + // independent mountpoints but have mounted subtrees contained in the + // mounted subtrees of the first mountpoint tree. + mountinfo := fmt.Sprintf(` +222 15 259:3 /0 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /1 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /2 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /1/3 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /2/4 %s rw shared:1 - ext4 /dev/root rw +`, tempDir+"/a/b", tempDir+"/a", tempDir+"/a/c", + tempDir+"/d", tempDir+"/e") + + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/1" { t.Error("Wrong mount was chosen") } } -// Test that if a filesystem only has bind mounts, a nil mountsByDevice entry is -// created. -func TestLoadOnlyBindMounts(t *testing.T) { +// Test loading mounts with independent subtrees *and* independent mountpoints. +// This case is ambiguous, so an explicit nil entry should be stored. +func TestLoadAmbiguousMounts(t *testing.T) { mountinfo := ` -222 15 259:3 /foo /mnt ro,relatime shared:1 - ext4 /dev/root rw,data=ordered +222 15 259:3 /foo /mnt rw shared:1 - ext4 /dev/root rw +222 15 259:3 /bar /tmp rw shared:1 - ext4 /dev/root rw ` beginLoadMountInfoTest() defer endLoadMountInfoTest()