From 9e43cfcc22f583b0dddb04c7eb69d879361a1c1d Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:08:07 +0200 Subject: [PATCH 01/10] improve robustness of journal plugin --- dissect/target/plugins/os/unix/log/journal.py | 72 ++++++++++++------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index 4c9e3b05e..39423ff5a 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import lzma from typing import BinaryIO, Callable, Iterator @@ -13,6 +14,8 @@ from dissect.target.helpers.record import TargetRecordDescriptor from dissect.target.plugin import Plugin, export +log = logging.getLogger(__name__) + # The events have undocumented fields that are not part of the record JournalRecord = TargetRecordDescriptor( "linux/log/journal", @@ -28,7 +31,7 @@ ("varint", "errno"), ("string", "invocation_id"), ("string", "user_invocation_id"), - ("varint", "syslog_facility"), + ("string", "syslog_facility"), ("string", "syslog_identifier"), ("varint", "syslog_pid"), ("string", "syslog_raw"), @@ -70,7 +73,7 @@ ("path", "udev_devlink"), # Other fields ("string", "journal_hostname"), - ("path", "filepath"), + ("path", "source"), ], ) @@ -259,16 +262,26 @@ def get_optional(value: str, to_type: Callable): """Return the value if True, otherwise return None.""" - return to_type(value) if value else None + + if not value: + return None + + try: + return to_type(value) + + except ValueError as e: + log.error("Unable to cast '%s' to %s", value, to_type) + log.debug("", exc_info=e) + return None class JournalFile: """Parse Systemd Journal file format. References: - - https://github.com/systemd/systemd/blob/206f0f397edf1144c63a158fb30f496c3e89f256/docs/JOURNAL_FILE_FORMAT.md - - https://github.com/libyal/dtformats/blob/c4fc2b8102702c64b58f145971821986bf74e6c0/documentation/Systemd%20journal%20file%20format.asciidoc - """ # noqa: E501 + - https://github.com/systemd/systemd/blob/main/docs/JOURNAL_FILE_FORMAT.md + - https://github.com/libyal/dtformats/blob/main/documentation/Systemd%20journal%20file%20format.asciidoc + """ def __init__(self, fh: BinaryIO, target: Target): self.fh = fh @@ -321,10 +334,16 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: for offset in self.entry_object_offsets(): self.fh.seek(offset) - if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: - entry = c_journal.EntryObject_Compact(self.fh) - else: - entry = c_journal.EntryObject(self.fh) + try: + if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: + entry = c_journal.EntryObject_Compact(self.fh) + else: + entry = c_journal.EntryObject(self.fh) + + except EOFError as e: + self.target.log.warning("Unable to read Journal EntryObject at offset %s in: %s", offset, self.fh) + self.target.log.debug("", exc_info=e) + continue event = {} event["ts"] = ts.from_unix_us(entry.realtime) @@ -360,7 +379,7 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: except Exception as e: self.target.log.warning( - "The data object in Journal file %s could not be parsed", + "Data object in Journal file could not be parsed: %s", getattr(self.fh, "name", None), exc_info=e, ) @@ -370,39 +389,44 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: class JournalPlugin(Plugin): + """Systemd Journal plugin.""" + JOURNAL_PATHS = ["/var/log/journal"] # TODO: /run/systemd/journal JOURNAL_GLOB = "*/*.journal*" # The extensions .journal and .journal~ JOURNAL_SIGNATURE = "LPKSHHRH" def __init__(self, target: Target): super().__init__(target) - self.journal_paths = [] + self.journal_files = [] - for _path in self.JOURNAL_PATHS: - self.journal_paths.extend(self.target.fs.path(_path).glob(self.JOURNAL_GLOB)) + for journal_path in self.JOURNAL_PATHS: + self.journal_files.extend(self.target.fs.path(journal_path).glob(self.JOURNAL_GLOB)) def check_compatible(self) -> None: - if not len(self.journal_paths): + if not self.journal_files: raise UnsupportedPluginError("No journald files found") @export(record=JournalRecord) def journal(self) -> Iterator[JournalRecord]: - """Return the content of Systemd Journal log files. + """Return the contents of Systemd Journal log files. References: - https://wiki.archlinux.org/title/Systemd/Journal - - https://github.com/systemd/systemd/blob/9203abf79f1d05fdef9b039e7addf9fc5a27752d/man/systemd.journal-fields.xml - """ # noqa: E501 - + - https://github.com/systemd/systemd/blob/main/man/systemd.journal-fields.xml + """ path_function = self.target.fs.path - for _path in self.journal_paths: - fh = _path.open() + for journal_file in self.journal_files: + + if not journal_file.is_file(): + self.target.log.warning("Unable to parse journal file: %s", journal_file) + continue + fh = journal_file.open() journal = JournalFile(fh, self.target) if not journal.signature == self.JOURNAL_SIGNATURE: - self.target.log.warning("The Journal log file %s has an invalid magic header", _path) + self.target.log.warning("Journal file %s has invalid magic header %s", journal_file, journal.signature) continue for entry in journal: @@ -417,7 +441,7 @@ def journal(self) -> Iterator[JournalRecord]: errno=get_optional(entry.get("errno"), int), invocation_id=entry.get("invocation_id"), user_invocation_id=entry.get("user_invocation_id"), - syslog_facility=get_optional(entry.get("syslog_facility"), int), + syslog_facility=entry.get("syslog_facility"), syslog_identifier=entry.get("syslog_identifier"), syslog_pid=get_optional(entry.get("syslog_pid"), int), syslog_raw=entry.get("syslog_raw"), @@ -456,6 +480,6 @@ def journal(self) -> Iterator[JournalRecord]: udev_devnode=get_optional(entry.get("udev_devnode"), path_function), udev_devlink=get_optional(entry.get("udev_devlink"), path_function), journal_hostname=entry.get("hostname"), - filepath=_path, + source=journal_file, _target=self.target, ) From eb77c5d11487dac27cdd8b5a9664086c14ec3826 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:23:10 +0200 Subject: [PATCH 02/10] implement review feedback --- dissect/target/plugins/os/unix/log/journal.py | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index 39423ff5a..239669099 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -2,7 +2,7 @@ import logging import lzma -from typing import BinaryIO, Callable, Iterator +from typing import Any, BinaryIO, Callable, Iterator import zstandard from dissect.cstruct import cstruct @@ -260,7 +260,7 @@ c_journal = cstruct().load(journal_def) -def get_optional(value: str, to_type: Callable): +def get_optional(value: str, to_type: Callable) -> Any | None: """Return the value if True, otherwise return None.""" if not value: @@ -345,44 +345,46 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: self.target.log.debug("", exc_info=e) continue - event = {} - event["ts"] = ts.from_unix_us(entry.realtime) - + event = {"ts": ts.from_unix_us(entry.realtime)} for item in entry.items: try: self.fh.seek(item.object_offset) - object = c_journal.ObjectHeader(self.fh) - if object.type == c_journal.ObjectType.OBJECT_DATA: - self.fh.seek(item.object_offset) + if object.type != c_journal.ObjectType.OBJECT_DATA: + continue + + self.fh.seek(item.object_offset) + + if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: + data_object = c_journal.DataObject_Compact(self.fh) + else: + data_object = c_journal.DataObject(self.fh) + + data = data_object.payload + + if not data: + continue - if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: - data_object = c_journal.DataObject_Compact(self.fh) - else: - data_object = c_journal.DataObject(self.fh) + if data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_XZ: + data = lzma.decompress(data) - data = data_object.payload + elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_LZ4: + data = lz4.decompress(data[8:]) - if not data: - # If the payload is empty - continue - elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_XZ: - data = lzma.decompress(data) - elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_LZ4: - data = lz4.decompress(data[8:]) - elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_ZSTD: - data = zstandard.decompress(data) + elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_ZSTD: + data = zstandard.decompress(data) - key, value = self.decode_value(data) - event[key] = value + key, value = self.decode_value(data) + event[key] = value except Exception as e: self.target.log.warning( - "Data object in Journal file could not be parsed: %s", + "Journal DataObject could not be parsed at offset %s in %s", + item.object_offset, getattr(self.fh, "name", None), - exc_info=e, ) + self.target.log.debug("", exc_info=e) continue yield event From b8b70b355737b39d05463a9df662d92a0fd686b3 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:24:33 +0200 Subject: [PATCH 03/10] fix linter --- dissect/target/plugins/os/unix/log/journal.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index 239669099..b9848c08c 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -419,7 +419,6 @@ def journal(self) -> Iterator[JournalRecord]: path_function = self.target.fs.path for journal_file in self.journal_files: - if not journal_file.is_file(): self.target.log.warning("Unable to parse journal file: %s", journal_file) continue From 9f37d330bc2ca1127c9a9e61981379912ba674f1 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:58:51 +0200 Subject: [PATCH 04/10] some more sanity checks --- dissect/target/plugins/os/unix/log/journal.py | 21 +++++++++++++------ .../plugins/os/unix/{ => log}/test_journal.py | 0 2 files changed, 15 insertions(+), 6 deletions(-) rename tests/plugins/os/unix/{ => log}/test_journal.py (100%) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index b9848c08c..63d155d6e 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -78,6 +78,8 @@ ) journal_def = """ +#define HEADER_SIGNATURE "LPKSHHRH" + typedef uint8 uint8_t; typedef uint32 le32_t; typedef uint64 le64_t; @@ -395,7 +397,6 @@ class JournalPlugin(Plugin): JOURNAL_PATHS = ["/var/log/journal"] # TODO: /run/systemd/journal JOURNAL_GLOB = "*/*.journal*" # The extensions .journal and .journal~ - JOURNAL_SIGNATURE = "LPKSHHRH" def __init__(self, target: Target): super().__init__(target) @@ -420,14 +421,22 @@ def journal(self) -> Iterator[JournalRecord]: for journal_file in self.journal_files: if not journal_file.is_file(): - self.target.log.warning("Unable to parse journal file: %s", journal_file) + self.target.log.warning("Unable to parse journal file as it is not a file: %s", journal_file) continue - fh = journal_file.open() - journal = JournalFile(fh, self.target) + try: + fh = journal_file.open() + journal = JournalFile(fh, self.target) + + except Exception as e: + self.target.log.warning("Unable to parse journal file structure: %s", journal_file) + self.target.log.debug("", exc_info=e) + continue - if not journal.signature == self.JOURNAL_SIGNATURE: - self.target.log.warning("Journal file %s has invalid magic header %s", journal_file, journal.signature) + if journal.signature != c_journal.HEADER_SIGNATURE: + self.target.log.warning( + "Journal file has invalid magic header '%s': %s", journal.signature, journal_file + ) continue for entry in journal: diff --git a/tests/plugins/os/unix/test_journal.py b/tests/plugins/os/unix/log/test_journal.py similarity index 100% rename from tests/plugins/os/unix/test_journal.py rename to tests/plugins/os/unix/log/test_journal.py From bd77d5c1001f133f328d7232bd8dfbf9a9c78c9a Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:02:28 +0200 Subject: [PATCH 05/10] fix tests --- tests/plugins/os/unix/log/test_journal.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/plugins/os/unix/log/test_journal.py b/tests/plugins/os/unix/log/test_journal.py index 883be854f..82675bb2f 100644 --- a/tests/plugins/os/unix/log/test_journal.py +++ b/tests/plugins/os/unix/log/test_journal.py @@ -1,27 +1,30 @@ from flow.record.fieldtypes import datetime as dt +from dissect.target.filesystem import VirtualFilesystem from dissect.target.plugins.os.unix.log.journal import JournalPlugin +from dissect.target.target import Target from tests._utils import absolute_path -def test_journal_plugin(target_unix, fs_unix): +def test_journal_plugin(target_unix: Target, fs_unix: VirtualFilesystem) -> None: + """test linux systemd journal file parsing.""" + data_file = absolute_path("_data/plugins/os/unix/log/journal/journal") fs_unix.map_file("var/log/journal/1337/user-1000.journal", data_file) - target_unix.add_plugin(JournalPlugin) results = list(target_unix.journal()) - record = results[0] - assert len(results) == 2 + record = results[0] assert record.ts == dt("2023-05-19T16:22:38.841870+00:00") - assert ( - record.message - == "Window manager warning: last_user_time (928062) is greater than comparison timestamp (928031). This most likely represents a buggy client sending inaccurate timestamps in messages such as _NET_ACTIVE_WINDOW. Trying to work around..." # noqa: E501 + assert record.message == ( + "Window manager warning: last_user_time (928062) is greater than comparison timestamp (928031). " + "This most likely represents a buggy client sending inaccurate timestamps in messages such as " + "_NET_ACTIVE_WINDOW. Trying to work around..." ) - assert record.syslog_facility == 3 + assert record.syslog_facility == "3" assert record.syslog_identifier == "gnome-shell" assert record.pid == 2096 assert record.transport == "stdout" - assert str(record.filepath) == "/var/log/journal/1337/user-1000.journal" + assert record.source == "/var/log/journal/1337/user-1000.journal" From a6637e8fc021ab43d8fe92271a052c09fe834732 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:15:34 +0200 Subject: [PATCH 06/10] small performance improvements --- dissect/target/plugins/os/unix/log/journal.py | 159 ++++++++---------- 1 file changed, 71 insertions(+), 88 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index 63d155d6e..d6db0b9b3 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -105,7 +105,7 @@ }; struct Header { - uint8_t signature[8]; + char signature[8]; le32_t compatible_flags; IncompatibleFlag incompatible_flags; State state; @@ -170,7 +170,7 @@ // The first four members are copied from ObjectHeader, so that the size can be used as the length of payload struct DataObject { - ObjectType type; + // ObjectType type; ObjectFlag flags; uint8_t reserved[6]; le64_t size; @@ -186,7 +186,7 @@ // If the HEADER_INCOMPATIBLE_COMPACT flag is set, two extra fields are stored to allow immediate access // to the tail entry array in the DATA object's entry array chain. struct DataObject_Compact { - ObjectType type; + // ObjectType type; ObjectFlag flags; uint8_t reserved[6]; le64_t size; @@ -241,7 +241,7 @@ // The first four members are copied from from ObjectHeader, so that the size can be used as the length of entry_object_offsets struct EntryArrayObject { - ObjectType type; + // ObjectType type; uint8_t flags; uint8_t reserved[6]; le64_t size; @@ -250,7 +250,7 @@ }; struct EntryArrayObject_Compact { - ObjectType type; + // ObjectType type; uint8_t flags; uint8_t reserved[6]; le64_t size; @@ -288,108 +288,97 @@ class JournalFile: def __init__(self, fh: BinaryIO, target: Target): self.fh = fh self.target = target - self.header = c_journal.Header(self.fh) - self.signature = "".join(chr(c) for c in self.header.signature) - self.entry_array_offset = self.header.entry_array_offset - def entry_object_offsets(self) -> Iterator[int]: - """Read object entry arrays.""" + try: + self.header = c_journal.Header(self.fh) + except EOFError as e: + raise ValueError(f"Invalid Systemd Journal file: {str(e)}") - offset = self.entry_array_offset - - # Entry Array with next_entry_array_offset set to 0 is the last in the list - while offset != 0: - self.fh.seek(offset) - - object = c_journal.ObjectHeader(self.fh) - - if object.type == c_journal.ObjectType.OBJECT_ENTRY_ARRAY: - # After the object is checked, read again but with EntryArrayObject instead of ObjectHeader - self.fh.seek(offset) - - if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: - entry_array_object = c_journal.EntryArrayObject_Compact(self.fh) - else: - entry_array_object = c_journal.EntryArrayObject(self.fh) - - for entry_object_offset in entry_array_object.entry_object_offsets: - # Check if the offset is not zero and points to nothing - if entry_object_offset: - yield entry_object_offset - - offset = entry_array_object.next_entry_array_offset + if self.header.signature != c_journal.HEADER_SIGNATURE.encode(): + raise ValueError(f"Journal file has invalid magic header '{str(self.header.signature)}'") def decode_value(self, value: bytes) -> tuple[str, str]: - value = value.decode(encoding="utf-8", errors="surrogateescape").strip() - - # Strip leading underscores part of the field name - value = value.lstrip("_") - + """Decode the given bytes to a key value pair.""" + value = value.decode(encoding="utf-8", errors="surrogateescape").strip().lstrip("_") key, value = value.split("=", 1) key = key.lower() - return key, value def __iter__(self) -> Iterator[dict[str, int | str]]: "Iterate over the entry objects to read payloads." - for offset in self.entry_object_offsets(): + offset = self.header.entry_array_offset + while offset != 0: self.fh.seek(offset) - try: - if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: - entry = c_journal.EntryObject_Compact(self.fh) - else: - entry = c_journal.EntryObject(self.fh) + if int.from_bytes(self.fh.read(1)) != c_journal.ObjectType.OBJECT_ENTRY_ARRAY: + raise ValueError("Expected OBJECT_ENTRY_ARRAY at offset %s", offset) - except EOFError as e: - self.target.log.warning("Unable to read Journal EntryObject at offset %s in: %s", offset, self.fh) - self.target.log.debug("", exc_info=e) - continue + if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: + entry_array_object = c_journal.EntryArrayObject_Compact(self.fh) + else: + entry_array_object = c_journal.EntryArrayObject(self.fh) - event = {"ts": ts.from_unix_us(entry.realtime)} - for item in entry.items: - try: - self.fh.seek(item.object_offset) - object = c_journal.ObjectHeader(self.fh) + for entry_object_offset in entry_array_object.entry_object_offsets: + if entry_object_offset: + yield from self._parse_entry_object(offset=entry_object_offset) - if object.type != c_journal.ObjectType.OBJECT_DATA: - continue + offset = entry_array_object.next_entry_array_offset - self.fh.seek(item.object_offset) + def _parse_entry_object(self, offset: int) -> Iterator[dict]: + self.fh.seek(offset) - if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: - data_object = c_journal.DataObject_Compact(self.fh) - else: - data_object = c_journal.DataObject(self.fh) + try: + if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: + entry = c_journal.EntryObject_Compact(self.fh) + else: + entry = c_journal.EntryObject(self.fh) - data = data_object.payload + except EOFError as e: + self.target.log.warning("Unable to read Journal EntryObject at offset %s in: %s", offset, self.fh) + self.target.log.debug("", exc_info=e) + return - if not data: - continue + event = {"ts": ts.from_unix_us(entry.realtime)} + for item in entry.items: + try: + self.fh.seek(item.object_offset) - if data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_XZ: - data = lzma.decompress(data) + if int.from_bytes(self.fh.read(1)) != c_journal.ObjectType.OBJECT_DATA: + continue - elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_LZ4: - data = lz4.decompress(data[8:]) + if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: + data_object = c_journal.DataObject_Compact(self.fh) + else: + data_object = c_journal.DataObject(self.fh) - elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_ZSTD: - data = zstandard.decompress(data) + if not data_object.payload: + continue - key, value = self.decode_value(data) - event[key] = value + data = data_object.payload - except Exception as e: - self.target.log.warning( - "Journal DataObject could not be parsed at offset %s in %s", - item.object_offset, - getattr(self.fh, "name", None), - ) - self.target.log.debug("", exc_info=e) - continue + if data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_XZ: + data = lzma.decompress(data) + + elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_LZ4: + data = lz4.decompress(data[8:]) - yield event + elif data_object.flags & c_journal.ObjectFlag.OBJECT_COMPRESSED_ZSTD: + data = zstandard.decompress(data) + + key, value = self.decode_value(data) + event[key] = value + + except Exception as e: + self.target.log.warning( + "Journal DataObject could not be parsed at offset %s in %s", + item.object_offset, + getattr(self.fh, "name", None), + ) + self.target.log.debug("", exc_info=e) + continue + + yield event class JournalPlugin(Plugin): @@ -429,16 +418,10 @@ def journal(self) -> Iterator[JournalRecord]: journal = JournalFile(fh, self.target) except Exception as e: - self.target.log.warning("Unable to parse journal file structure: %s", journal_file) + self.target.log.warning("Unable to parse journal file structure: %s: %s", journal_file, str(e)) self.target.log.debug("", exc_info=e) continue - if journal.signature != c_journal.HEADER_SIGNATURE: - self.target.log.warning( - "Journal file has invalid magic header '%s': %s", journal.signature, journal_file - ) - continue - for entry in journal: yield JournalRecord( ts=entry.get("ts"), From e47650c6da3fc31f6be465a1327527660e972318 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:38:12 +0200 Subject: [PATCH 07/10] implement review feedback --- dissect/target/plugins/os/unix/log/journal.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index d6db0b9b3..df2ebf1a5 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -78,7 +78,7 @@ ) journal_def = """ -#define HEADER_SIGNATURE "LPKSHHRH" +#define HEADER_SIGNATURE b"LPKSHHRH" typedef uint8 uint8_t; typedef uint32 le32_t; @@ -281,9 +281,9 @@ class JournalFile: """Parse Systemd Journal file format. References: - - https://github.com/systemd/systemd/blob/main/docs/JOURNAL_FILE_FORMAT.md - - https://github.com/libyal/dtformats/blob/main/documentation/Systemd%20journal%20file%20format.asciidoc - """ + - https://github.com/systemd/systemd/blob/206f0f397edf1144c63a158fb30f496c3e89f256/docs/JOURNAL_FILE_FORMAT.md + - https://github.com/libyal/dtformats/blob/c4fc2b8102702c64b58f145971821986bf74e6c0/documentation/Systemd%20journal%20file%20format.asciidoc + """ # noqa: E501 def __init__(self, fh: BinaryIO, target: Target): self.fh = fh @@ -294,12 +294,12 @@ def __init__(self, fh: BinaryIO, target: Target): except EOFError as e: raise ValueError(f"Invalid Systemd Journal file: {str(e)}") - if self.header.signature != c_journal.HEADER_SIGNATURE.encode(): + if self.header.signature != c_journal.HEADER_SIGNATURE: raise ValueError(f"Journal file has invalid magic header '{str(self.header.signature)}'") def decode_value(self, value: bytes) -> tuple[str, str]: """Decode the given bytes to a key value pair.""" - value = value.decode(encoding="utf-8", errors="surrogateescape").strip().lstrip("_") + value = value.decode(errors="surrogateescape").strip().lstrip("_") key, value = value.split("=", 1) key = key.lower() return key, value @@ -312,7 +312,7 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: self.fh.seek(offset) if int.from_bytes(self.fh.read(1)) != c_journal.ObjectType.OBJECT_ENTRY_ARRAY: - raise ValueError("Expected OBJECT_ENTRY_ARRAY at offset %s", offset) + raise ValueError(f"Expected OBJECT_ENTRY_ARRAY at offset {offset}") if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: entry_array_object = c_journal.EntryArrayObject_Compact(self.fh) @@ -404,8 +404,8 @@ def journal(self) -> Iterator[JournalRecord]: References: - https://wiki.archlinux.org/title/Systemd/Journal - - https://github.com/systemd/systemd/blob/main/man/systemd.journal-fields.xml - """ + - https://github.com/systemd/systemd/blob/9203abf79f1d05fdef9b039e7addf9fc5a27752d/man/systemd.journal-fields.xml + """ # noqa: E501 path_function = self.target.fs.path for journal_file in self.journal_files: From 7490ee3e6179564b430bcd9679e8e84b26a0f364 Mon Sep 17 00:00:00 2001 From: Computer Network Investigation <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:32:12 +0200 Subject: [PATCH 08/10] Apply suggestions from code review Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com> --- dissect/target/plugins/os/unix/log/journal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index df2ebf1a5..355d0e96f 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -292,10 +292,10 @@ def __init__(self, fh: BinaryIO, target: Target): try: self.header = c_journal.Header(self.fh) except EOFError as e: - raise ValueError(f"Invalid Systemd Journal file: {str(e)}") + raise ValueError(f"Invalid systemd Journal file: {e}") if self.header.signature != c_journal.HEADER_SIGNATURE: - raise ValueError(f"Journal file has invalid magic header '{str(self.header.signature)}'") + raise ValueError(f"Journal file has invalid magic header: {self.header.signature!r}'") def decode_value(self, value: bytes) -> tuple[str, str]: """Decode the given bytes to a key value pair.""" From 7639ef32f2c70084d8a906efa9ba873ec77dc288 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 15 Oct 2024 17:52:15 +0200 Subject: [PATCH 09/10] fix for older python versions --- dissect/target/plugins/os/unix/log/journal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index 355d0e96f..ec3b20b2c 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -311,7 +311,7 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: while offset != 0: self.fh.seek(offset) - if int.from_bytes(self.fh.read(1)) != c_journal.ObjectType.OBJECT_ENTRY_ARRAY: + if int.from_bytes(self.fh.read(1), "little") != c_journal.ObjectType.OBJECT_ENTRY_ARRAY: raise ValueError(f"Expected OBJECT_ENTRY_ARRAY at offset {offset}") if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: @@ -344,7 +344,7 @@ def _parse_entry_object(self, offset: int) -> Iterator[dict]: try: self.fh.seek(item.object_offset) - if int.from_bytes(self.fh.read(1)) != c_journal.ObjectType.OBJECT_DATA: + if int.from_bytes(self.fh.read(1), "little") != c_journal.ObjectType.OBJECT_DATA: continue if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: From 3a7a100ab7546dc53d533f5790489ff84e5bfd91 Mon Sep 17 00:00:00 2001 From: Computer Network Investigation <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:39:59 +0200 Subject: [PATCH 10/10] Apply suggestions from code review Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com> --- dissect/target/plugins/os/unix/log/journal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/journal.py b/dissect/target/plugins/os/unix/log/journal.py index ec3b20b2c..278b35026 100644 --- a/dissect/target/plugins/os/unix/log/journal.py +++ b/dissect/target/plugins/os/unix/log/journal.py @@ -311,7 +311,7 @@ def __iter__(self) -> Iterator[dict[str, int | str]]: while offset != 0: self.fh.seek(offset) - if int.from_bytes(self.fh.read(1), "little") != c_journal.ObjectType.OBJECT_ENTRY_ARRAY: + if self.fh.read(1)[0] != c_journal.ObjectType.OBJECT_ENTRY_ARRAY: raise ValueError(f"Expected OBJECT_ENTRY_ARRAY at offset {offset}") if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: @@ -344,7 +344,7 @@ def _parse_entry_object(self, offset: int) -> Iterator[dict]: try: self.fh.seek(item.object_offset) - if int.from_bytes(self.fh.read(1), "little") != c_journal.ObjectType.OBJECT_DATA: + if self.fh.read(1)[0] != c_journal.ObjectType.OBJECT_DATA: continue if self.header.incompatible_flags & c_journal.IncompatibleFlag.HEADER_INCOMPATIBLE_COMPACT: