Skip to content

Report carve dir #1017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions .envrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
watch_file flake.nix flake.lock devenv.nix
use nix \
--option extra-substituters "https://unblob.cachix.org" \
--option extra-trusted-public-keys "unblob.cachix.org-1:5kWA6DwOg176rSqU8TOTBXWxsDB4LoCMfGfTgL5qCAE="
# Source further custom features from .env.user if exists
# Also allows users to disable/override features in this file
source_env_if_exists .envrc.user

if ${UNBLOB_USE_DEVENV:-true}; then
watch_file flake.nix flake.lock devenv.nix
use nix \
--option extra-substituters "https://unblob.cachix.org" \
--option extra-trusted-public-keys "unblob.cachix.org-1:5kWA6DwOg176rSqU8TOTBXWxsDB4LoCMfGfTgL5qCAE="
fi
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
tests/integration/** filter=lfs diff=lfs merge=lfs -text
tests/files/** filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions tests/files/suffixes/__input__/chunks
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/files/suffixes/__input__/collisions.zip
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/files/suffixes/chunks
Git LFS file not shown
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def test_keep_extracted_chunks(
[
pytest.param([], 5, id="skip-extension-empty"),
pytest.param([""], 5, id="skip-zip-extension-empty-suffix"),
pytest.param([".zip"], 1, id="skip-extension-zip"),
pytest.param([".zip"], 0, id="skip-extension-zip"),
pytest.param([".rlib"], 5, id="skip-extension-rlib"),
],
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@
)
from unblob.report import (
ChunkReport,
ExtractDirectoryExistsReport,
FileMagicReport,
HashReport,
MultiFileCollisionReport,
MultiFileReport,
OutputDirectoryExistsReport,
RandomnessMeasurements,
RandomnessReport,
StatReport,
Expand Down Expand Up @@ -350,7 +350,7 @@ def test_process_file_prevents_double_extracts(tmp_path: Path, fw: Path):

# we expect exactly 1 problem reported, related to the extraction of "internal.zip"
[report] = process_result.errors
assert isinstance(report, ExtractDirectoryExistsReport)
assert isinstance(report, OutputDirectoryExistsReport)
assert report.path.name == "internal.zip_extract"

# the rest should be the same, except that the extraction is shifted with one extra directory
Expand Down Expand Up @@ -819,7 +819,7 @@ def test_multi_file_extract_dir(
multi_file_reports = task_result_by_path[directory].filter_reports(MultiFileReport)
assert multi_file_reports
assert any(
isinstance(report, ExtractDirectoryExistsReport)
isinstance(report, OutputDirectoryExistsReport)
for report in multi_file_reports[0].extraction_reports
)

Expand Down
100 changes: 100 additions & 0 deletions tests/test_processing_suffixes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from pathlib import Path

import pytest

from unblob.processing import ExtractionConfig, process_file
from unblob.report import OutputDirectoryExistsReport
from unblob.testing import check_output_is_the_same

TEST_DATA_PATH = Path(__file__).parent / "files/suffixes"


def _patch(extraction_config: ExtractionConfig, carve_suffix: str, extract_suffix: str):
extraction_config.keep_extracted_chunks = False
extraction_config.carve_suffix = carve_suffix
extraction_config.extract_suffix = extract_suffix


@pytest.mark.parametrize(
"carve_suffix,extract_suffix,output_root_dir_name",
[
("_extract", "_extract", "defaults"),
("_c", "_e", "_c_e"),
("_carve", "_extract", "_carve_extract"),
],
)
def test_top_level_carve(
carve_suffix: str,
extract_suffix: str,
output_root_dir_name: str,
extraction_config: ExtractionConfig,
):
_patch(extraction_config, carve_suffix, extract_suffix)
input_file = TEST_DATA_PATH / "__input__/chunks"
carve_dir_name = input_file.name + extraction_config.carve_suffix
extract_dir_name = input_file.name + extraction_config.extract_suffix
expected_output_dir = TEST_DATA_PATH / "__outputs__/chunks" / output_root_dir_name

reports = process_file(extraction_config, input_file)

assert reports.errors == []

assert (
carve_dir_name == extract_dir_name
or not (extraction_config.extract_root / extract_dir_name).exists()
)
check_output_is_the_same(expected_output_dir, extraction_config.extract_root)


EXPECTED_COLLISION_PATHS: "dict[tuple[str, str], set]" = {
("_extract", "_extract"): {
"collisions.zip_extract/chunks_carve/0-160.gzip_extract",
},
("_carve", "_extract"): {
"collisions.zip_extract/chunks_carve",
"collisions.zip_extract/chunks_carve/0-160.gzip_extract",
},
}


@pytest.mark.parametrize(
"carve_suffix,extract_suffix,output_root_dir_name",
[
("_extract", "_extract", "defaults"),
("_c", "_e", "_c_e"),
("_carve", "_extract", "_carve_extract"),
],
)
def test_top_level_extract_and_collisions(
carve_suffix: str,
extract_suffix: str,
output_root_dir_name: str,
extraction_config: ExtractionConfig,
):
_patch(extraction_config, carve_suffix, extract_suffix)
input_file = TEST_DATA_PATH / "__input__/collisions.zip"
carve_dir_name = input_file.name + extraction_config.carve_suffix
extract_dir_name = input_file.name + extraction_config.extract_suffix
expected_output_dir = (
TEST_DATA_PATH / "__outputs__/collisions.zip" / output_root_dir_name
)

reports = process_file(extraction_config, input_file)

# check collision problems - the input was prepared to have collisions
# during both the carving and extracting phases
problem_paths = {
e.path.relative_to(extraction_config.extract_root).as_posix()
for e in reports.errors
if isinstance(e, OutputDirectoryExistsReport)
}
key = (carve_suffix, extract_suffix)
assert problem_paths == EXPECTED_COLLISION_PATHS.get(key, set())
# we expect only OutputDirectoryExistsReport-s
assert len(reports.errors) == len(problem_paths)

assert (
carve_dir_name == extract_dir_name
or not (extraction_config.extract_root / carve_dir_name).exists()
)
check_output_is_the_same(expected_output_dir, extraction_config.extract_root)
2 changes: 2 additions & 0 deletions tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from unblob.models import ProcessResult, Task, TaskResult
from unblob.processing import ExtractionConfig, process_file
from unblob.report import (
CarveDirectoryReport,
ChunkReport,
FileMagicReport,
HashReport,
Expand Down Expand Up @@ -120,6 +121,7 @@ def hello_kitty_task_results(
sha1="febca6ed75dc02e0def065e7b08f1cca87b57c74",
sha256="144d8b2c949cb4943128aa0081153bcba4f38eb0ba26119cc06ca1563c4999e1",
),
CarveDirectoryReport(carve_dir=extract_root / "hello_kitty_extract"),
UnknownChunkReport(
id=ANY,
start_offset=0,
Expand Down
Loading
Loading