From 074255b99fbd5313abb1c5f57e2f5102bf6dcdfc Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:08:15 +0100 Subject: [PATCH 1/5] prevent enumerating entire non-iso formatted syslog files in is_iso_fmt --- dissect/target/plugins/os/unix/log/helpers.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index f37c4d8c3..702f4529f 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -22,12 +22,17 @@ ) -def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]: +def iso_readlines(file: Path, max_lines: int | None = None) -> Iterator[tuple[datetime, str]]: """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" with open_decompress(file, "rt") as fh: - for line in fh: + for i, line in enumerate(fh): + if i > max_lines: + log.debug("Stopping iso_readlines enumeration in %s: max_lines=%s was reached", file, max_lines) + break + if not (match := RE_TS_ISO.match(line)): - log.warning("No timestamp found in one of the lines in %s!", file) + if not max_lines: + log.warning("No timestamp found in one of the lines in %s!", file) log.debug("Skipping line: %s", line) continue @@ -43,4 +48,6 @@ def iso_readlines(file: Path) -> Iterator[tuple[datetime, str]]: def is_iso_fmt(file: Path) -> bool: """Determine if the provided log file uses ISO 8601 timestamp format logging or not.""" - return any(itertools.islice(iso_readlines(file), 0, 2)) + # We do not want to iterate of the entire file so we limit iso_readlines to the first few lines. + # We can not use islice here since that would only work if the file is ISO formatted and thus yields results. + return any(iso_readlines(file, max_lines=3)) From 9bee5c31e2630bce4a466a4cb8334e1c7071c066 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:18:18 +0100 Subject: [PATCH 2/5] small fix --- dissect/target/plugins/os/unix/log/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index 702f4529f..21665aeb8 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -26,7 +26,7 @@ def iso_readlines(file: Path, max_lines: int | None = None) -> Iterator[tuple[da """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" with open_decompress(file, "rt") as fh: for i, line in enumerate(fh): - if i > max_lines: + if isinstance(max_lines, int) and i > max_lines: log.debug("Stopping iso_readlines enumeration in %s: max_lines=%s was reached", file, max_lines) break From e57a1be26b3be45931bbb89f6f54fc12dcabfa1e Mon Sep 17 00:00:00 2001 From: Computer Network Investigation <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:05:19 +0100 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Erik Schamper <1254028+Schamper@users.noreply.github.com> --- dissect/target/plugins/os/unix/log/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index 21665aeb8..7d2df9c94 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -26,7 +26,7 @@ def iso_readlines(file: Path, max_lines: int | None = None) -> Iterator[tuple[da """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" with open_decompress(file, "rt") as fh: for i, line in enumerate(fh): - if isinstance(max_lines, int) and i > max_lines: + if max_lines is not None and i > max_lines: log.debug("Stopping iso_readlines enumeration in %s: max_lines=%s was reached", file, max_lines) break From 5f5aceafe5c719a1129d416c47a25220804dea8f Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:55:08 +0100 Subject: [PATCH 4/5] add tests --- dissect/target/plugins/os/unix/log/helpers.py | 2 +- tests/plugins/os/unix/log/test_helpers.py | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/plugins/os/unix/log/test_helpers.py diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index 7d2df9c94..834c3225a 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -26,7 +26,7 @@ def iso_readlines(file: Path, max_lines: int | None = None) -> Iterator[tuple[da """Iterator reading the provided log file in ISO format. Mimics ``year_rollover_helper`` behaviour.""" with open_decompress(file, "rt") as fh: for i, line in enumerate(fh): - if max_lines is not None and i > max_lines: + if max_lines is not None and i >= max_lines: log.debug("Stopping iso_readlines enumeration in %s: max_lines=%s was reached", file, max_lines) break diff --git a/tests/plugins/os/unix/log/test_helpers.py b/tests/plugins/os/unix/log/test_helpers.py new file mode 100644 index 000000000..f01121920 --- /dev/null +++ b/tests/plugins/os/unix/log/test_helpers.py @@ -0,0 +1,41 @@ +import gzip +from io import BytesIO +import textwrap +import pytest + +from dissect.target.filesystem import VirtualFilesystem +from dissect.target.plugins.os.unix.log.helpers import is_iso_fmt, iso_readlines + +syslog = """\ +Dec 31 03:14:15 localhost systemd[1]: Starting Journal Service... +Jan 1 13:21:34 localhost systemd: Stopped target Swap. +Jan 2 03:14:15 localhost systemd[1]: Starting Journal Service... +Jan 3 13:21:34 localhost systemd: Stopped target Swap. +2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: Started anacron.service - Run anacron jobs. +2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Anacron 2.3 started on 2024-12-31 +2024-12-31T13:37:00.123456+02:00 hostname anacron[1337]: Normal exit (0 jobs run) +2024-12-31T13:37:00.123456+02:00 hostname systemd[1]: anacron.service: Deactivated successfully. +""" + + +@pytest.mark.parametrize( + "max_lines, expected_return_value", + [ + (3, False), + (4, False), + (5, True), + (9, True), + ], +) +def test_iso_readlines_max_lines(fs_unix: VirtualFilesystem, max_lines: int, expected_return_value: bool) -> None: + """assert that iso_readlines does not parse more than the provided max_lines""" + + fs_unix.map_file_fh("/var/log/syslog.2", BytesIO(gzip.compress(textwrap.dedent(syslog).encode()))) + assert any(iso_readlines(fs_unix.path("/var/log/syslog.2"), max_lines)) == expected_return_value + + +def test_is_iso_fmt(fs_unix: VirtualFilesystem) -> None: + """assert that is_iso_fmt does not parse more than three max_lines""" + + fs_unix.map_file_fh("/var/log/syslog.3", BytesIO(gzip.compress(textwrap.dedent(syslog).encode()))) + assert not is_iso_fmt(fs_unix.path("/var/log/syslog.3")) From e3b61996d2199df19ee6d7f5f68ef25c3152a882 Mon Sep 17 00:00:00 2001 From: JSCU-CNI <121175071+JSCU-CNI@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:11:42 +0100 Subject: [PATCH 5/5] fix linter --- dissect/target/plugins/os/unix/log/helpers.py | 3 ++- tests/plugins/os/unix/log/test_helpers.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dissect/target/plugins/os/unix/log/helpers.py b/dissect/target/plugins/os/unix/log/helpers.py index 834c3225a..01b7ee863 100644 --- a/dissect/target/plugins/os/unix/log/helpers.py +++ b/dissect/target/plugins/os/unix/log/helpers.py @@ -1,4 +1,5 @@ -import itertools +from __future__ import annotations + import logging import re from datetime import datetime diff --git a/tests/plugins/os/unix/log/test_helpers.py b/tests/plugins/os/unix/log/test_helpers.py index f01121920..fabba5c80 100644 --- a/tests/plugins/os/unix/log/test_helpers.py +++ b/tests/plugins/os/unix/log/test_helpers.py @@ -1,6 +1,7 @@ import gzip -from io import BytesIO import textwrap +from io import BytesIO + import pytest from dissect.target.filesystem import VirtualFilesystem