Skip to content

Commit

Permalink
Add support for dissect.btrfs (fox-it#370)
Browse files Browse the repository at this point in the history
  • Loading branch information
Schamper authored and Zawadidone committed Apr 5, 2024
1 parent 60f73e0 commit 4149697
Show file tree
Hide file tree
Showing 9 changed files with 347 additions and 71 deletions.
94 changes: 87 additions & 7 deletions dissect/target/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,20 @@ class Filesystem:
# This has the added benefit of having a readily available "pretty name" for each implementation
__type__: str = None
"""A short string identifying the type of filesystem."""
__multi_volume__: bool = False
"""Whether this filesystem supports multiple volumes (disks)."""

def __init__(
self,
volume: Optional[BinaryIO] = None,
volume: Optional[Union[BinaryIO, list[BinaryIO]]] = None,
alt_separator: str = "",
case_sensitive: bool = True,
) -> None:
"""The base initializer for the class.
Args:
volume: A volume or other file-like object associated with the filesystem.
case_sensitive: Defines if the paths in the Filesystem are case sensitive or not.
case_sensitive: Defines if the paths in the filesystem are case sensitive or not.
alt_separator: The alternative separator used to distingish between directories in a path.
Raises:
Expand All @@ -82,7 +84,7 @@ def __fstype__(cls) -> str:
return cls.__type__

def path(self, *args) -> fsutil.TargetPath:
"""Get a specific path from the filesystem."""
"""Instantiate a new path-like object on this filesystem."""
return fsutil.TargetPath(self, *args)

@classmethod
Expand Down Expand Up @@ -125,6 +127,52 @@ def _detect(fh: BinaryIO) -> bool:
"""
raise NotImplementedError()

@classmethod
def detect_id(cls, fh: BinaryIO) -> Optional[bytes]:
"""Return a filesystem set identifier.
Only used in filesystems that support multiple volumes (disks) to find all volumes
belonging to a single filesystem.
Args:
fh: A file-like object, usually a disk or partition.
"""
if not cls.__multi_volume__:
return None

offset = fh.tell()
try:
fh.seek(0)
return cls._detect_id(fh)
except NotImplementedError:
raise
except Exception as e:
log.warning("Failed to detect ID on %s filesystem", cls.__fstype__)
log.debug("", exc_info=e)
finally:
fh.seek(offset)

return None

@staticmethod
def _detect_id(fh: BinaryIO) -> Optional[bytes]:
"""Return a filesystem set identifier.
This method should be implemented by subclasses of filesystems that support multiple volumes (disks).
The position of ``fh`` is guaranteed to be ``0``.
Args:
fh: A file-like object, usually a disk or partition.
Returns:
An identifier that can be used to combine the given ``fh`` with others beloning to the same set.
"""
raise NotImplementedError()

def iter_subfs(self) -> Iterator[Filesystem]:
"""Yield possible sub-filesystems."""
yield from ()

def get(self, path: str) -> FilesystemEntry:
"""Retrieve a :class:`FilesystemEntry` from the filesystem.
Expand Down Expand Up @@ -1461,6 +1509,18 @@ def register(module: str, class_name: str, internal: bool = True) -> None:
FILESYSTEMS.append(getattr(import_lazy(module), class_name))


def is_multi_volume_filesystem(fh: BinaryIO) -> bool:
for filesystem in FILESYSTEMS:
try:
if filesystem.__multi_volume__ and filesystem.detect(fh):
return True
except ImportError as e:
log.info("Failed to import %s", filesystem)
log.debug("", exc_info=e)

return False


def open(fh: BinaryIO, *args, **kwargs) -> Filesystem:
offset = fh.tell()
fh.seek(0)
Expand All @@ -1469,10 +1529,7 @@ def open(fh: BinaryIO, *args, **kwargs) -> Filesystem:
for filesystem in FILESYSTEMS:
try:
if filesystem.detect(fh):
instance = filesystem(fh, *args, **kwargs)
instance.volume = fh

return instance
return filesystem(fh, *args, **kwargs)
except ImportError as e:
log.info("Failed to import %s", filesystem)
log.debug("", exc_info=e)
Expand All @@ -1482,12 +1539,35 @@ def open(fh: BinaryIO, *args, **kwargs) -> Filesystem:
raise FilesystemError(f"Failed to open filesystem for {fh}")


def open_multi_volume(fhs: list[BinaryIO], *args, **kwargs) -> Filesystem:
for filesystem in FILESYSTEMS:
try:
if not filesystem.__multi_volume__:
continue

volumes = defaultdict(list)
for fh in fhs:
if not filesystem.detect(fh):
continue

identifier = filesystem.detect_id(fh)
volumes[identifier].append(fh)

for vols in volumes.values():
yield filesystem(vols, *args, **kwargs)

except ImportError as e:
log.info("Failed to import %s", filesystem)
log.debug("", exc_info=e)


register("ntfs", "NtfsFilesystem")
register("extfs", "ExtFilesystem")
register("xfs", "XfsFilesystem")
register("fat", "FatFilesystem")
register("ffs", "FfsFilesystem")
register("vmfs", "VmfsFilesystem")
register("btrfs", "BtrfsFilesystem")
register("exfat", "ExfatFilesystem")
register("squashfs", "SquashFSFilesystem")
register("zip", "ZipFilesystem")
Expand Down
180 changes: 180 additions & 0 deletions dissect/target/filesystems/btrfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
from __future__ import annotations

from typing import BinaryIO, Iterator, Optional, Union

import dissect.btrfs as btrfs
from dissect.btrfs.c_btrfs import c_btrfs

from dissect.target.exceptions import (
FileNotFoundError,
FilesystemError,
IsADirectoryError,
NotADirectoryError,
NotASymlinkError,
)
from dissect.target.filesystem import Filesystem, FilesystemEntry
from dissect.target.helpers import fsutil


class BtrfsFilesystem(Filesystem):
__fstype__ = "btrfs"
__multi_volume__ = True

def __init__(self, fh: Union[BinaryIO, list[BinaryIO]], *args, **kwargs):
super().__init__(fh, *args, **kwargs)
self.btrfs = btrfs.Btrfs(fh)
self.subfs = self.open_subvolume()
self.subvolume = self.subfs.subvolume

@staticmethod
def _detect(fh: BinaryIO) -> bool:
fh.seek(c_btrfs.BTRFS_SUPER_INFO_OFFSET)
block = fh.read(4096)
magic = int.from_bytes(block[64:72], "little")

return magic == c_btrfs.BTRFS_MAGIC

@staticmethod
def _detect_id(fh: BinaryIO) -> Optional[bytes]:
# First field is csum, followed by fsid
fh.seek(c_btrfs.BTRFS_SUPER_INFO_OFFSET + c_btrfs.BTRFS_CSUM_SIZE)
return fh.read(c_btrfs.BTRFS_FSID_SIZE)

def iter_subfs(self) -> Iterator[BtrfsSubvolumeFilesystem]:
for subvol in self.btrfs.subvolumes():
if subvol.objectid == self.subfs.subvolume.objectid:
# Skip the default volume as it's already opened by the main filesystem
continue
yield self.open_subvolume(subvolid=subvol.objectid)

def open_subvolume(self, subvol: Optional[str] = None, subvolid: Optional[int] = None) -> BtrfsSubvolumeFilesystem:
return BtrfsSubvolumeFilesystem(self, subvol, subvolid)

def get(self, path: str) -> FilesystemEntry:
return self.subfs.get(path)


class BtrfsSubvolumeFilesystem(Filesystem):
__fstype__ = "btrfs"

def __init__(self, fs: BtrfsFilesystem, subvol: Optional[str] = None, subvolid: Optional[int] = None):
super().__init__(fs.volume, alt_separator=fs.alt_separator, case_sensitive=fs.case_sensitive)
if subvol is not None and subvolid is not None:
raise ValueError("Only one of subvol or subvolid is allowed")

self.fs = fs
self.btrfs = fs.btrfs
if subvol:
self.subvolume = self.btrfs.find_subvolume(subvol)
elif subvolid:
self.subvolume = self.btrfs.open_subvolume(subvolid)
else:
self.subvolume = self.btrfs.default_subvolume

def get(self, path: str) -> FilesystemEntry:
return BtrfsFilesystemEntry(self, path, self._get_node(path))

def _get_node(self, path: str, node: Optional[btrfs.INode] = None) -> btrfs.INode:
try:
return self.subvolume.get(path, node)
except btrfs.FileNotFoundError as e:
raise FileNotFoundError(path, cause=e)
except btrfs.NotADirectoryError as e:
raise NotADirectoryError(path, cause=e)
except btrfs.NotASymlinkError as e:
raise NotASymlinkError(path, cause=e)
except btrfs.Error as e:
raise FileNotFoundError(path, cause=e)


class BtrfsFilesystemEntry(FilesystemEntry):
fs: BtrfsFilesystem
entry: btrfs.INode

def get(self, path: str) -> FilesystemEntry:
entry_path = fsutil.join(self.path, path, alt_separator=self.fs.alt_separator)
entry = self.fs._get_node(path, self.entry)
return BtrfsFilesystemEntry(self.fs, entry_path, entry)

def open(self) -> BinaryIO:
if self.is_dir():
raise IsADirectoryError(self.path)
return self._resolve().entry.open()

def _iterdir(self) -> Iterator[btrfs.INode]:
if not self.is_dir():
raise NotADirectoryError(self.path)

if self.is_symlink():
for entry in self.readlink_ext().iterdir():
yield entry
else:
for name, entry in self.entry.iterdir():
if name in (".", ".."):
continue

yield name, entry

def iterdir(self) -> Iterator[str]:
for name, _ in self._iterdir():
yield name

def scandir(self) -> Iterator[FilesystemEntry]:
for name, entry in self._iterdir():
entry_path = fsutil.join(self.path, name, alt_separator=self.fs.alt_separator)
yield BtrfsFilesystemEntry(self.fs, entry_path, entry)

def is_dir(self, follow_symlinks: bool = True) -> bool:
try:
return self._resolve(follow_symlinks=follow_symlinks).entry.is_dir()
except FilesystemError:
return False

def is_file(self, follow_symlinks: bool = True) -> bool:
try:
return self._resolve(follow_symlinks=follow_symlinks).entry.is_file()
except FilesystemError:
return False

def is_symlink(self) -> bool:
return self.entry.is_symlink()

def readlink(self) -> str:
if not self.is_symlink():
raise NotASymlinkError()

return self.entry.link

def stat(self, follow_symlinks: bool = True) -> fsutil.stat_result:
return self._resolve(follow_symlinks=follow_symlinks).lstat()

def lstat(self) -> fsutil.stat_result:
entry = self.entry
node = self.entry.inode

# mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime
st_info = st_info = fsutil.stat_result(
[
entry.mode,
entry.inum,
0,
node.nlink,
entry.uid,
entry.gid,
entry.size,
# timestamp() returns a float which will fill both the integer and float fields
entry.atime.timestamp(),
entry.mtime.timestamp(),
entry.ctime.timestamp(),
]
)

# Set the nanosecond resolution separately
st_info.st_atime_ns = entry.atime_ns
st_info.st_mtime_ns = entry.mtime_ns
st_info.st_ctime_ns = entry.ctime_ns

# Btrfs has a birth time, called otime
st_info.st_birthtime = entry.otime.timestamp()

return st_info
17 changes: 14 additions & 3 deletions dissect/target/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datetime import datetime, timezone, tzinfo
from enum import Enum
from pathlib import Path
from typing import BinaryIO, Iterator, Union
from typing import BinaryIO, Callable, Iterator, Optional, Union

from dissect.util.ts import from_unix

Expand All @@ -28,22 +28,33 @@ class StrEnum(str, Enum):
"""Sortable and serializible string-based enum"""


def list_to_frozen_set(function):
def list_to_frozen_set(function: Callable) -> Callable:
def wrapper(*args):
args = [frozenset(x) if isinstance(x, list) else x for x in args]
return function(*args)

return wrapper


def parse_path_uri(path):
def parse_path_uri(path: Path) -> tuple[Optional[str], Optional[str], Optional[str]]:
if path is None:
return None, None, None
parsed_path = urllib.parse.urlparse(str(path))
parsed_query = urllib.parse.parse_qs(parsed_path.query, keep_blank_values=True)
return parsed_path.scheme, parsed_path.path, parsed_query


def parse_options_string(options: str) -> dict[str, Union[str, bool]]:
result = {}
for opt in options.split(","):
if "=" in opt:
key, _, value = opt.partition("=")
result[key] = value
else:
result[opt] = True
return result


SLUG_RE = re.compile(r"[/\\ ]")


Expand Down
Loading

0 comments on commit 4149697

Please sign in to comment.