Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compare_paks.py #65

Merged
merged 1 commit into from
Jul 7, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions tools/compare_paks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import argparse
import collections
import dataclasses
import pprint
from pathlib import Path

from retro_data_structures.base_resource import AssetId
from retro_data_structures.formats import Pak
from retro_data_structures.game_check import Game


@dataclasses.dataclass()
class PakMetrics:
id_occurrences: dict[AssetId, int]

@classmethod
def get_metrics(cls, pak: Pak):
id_occurrences = collections.defaultdict(int)
for file in pak._raw.files:
id_occurrences[file.asset_id] += 1

return cls(
id_occurrences=id_occurrences,
)


def compare(pak_a_path: Path, pak_b_path: Path, game: Game):
pak_a = Pak.parse(pak_a_path.read_bytes(), target_game=game)
pak_b = Pak.parse(pak_b_path.read_bytes(), target_game=game)

names = {}

for name, asset in pak_a.named_assets.items():
names[asset] = name
for name, asset in pak_b.named_assets.items():
names[asset] = name

def format_id(i):
s = f"0x{i:08X}"
if i in names:
s += f" ({names[i]})"
return s

a_metrics = PakMetrics.get_metrics(pak_a)
b_metrics = PakMetrics.get_metrics(pak_b)

a_ids_set = set(a_metrics.id_occurrences.keys())
b_ids_set = set(b_metrics.id_occurrences.keys())
both_ids = a_ids_set.intersection(b_ids_set)

a_only = a_ids_set - b_ids_set
b_only = b_ids_set - a_ids_set

if a_only:
print("===== ids only in a =====")
pprint.pp(list(map(format_id, a_only)), width=200)

if b_only:
print("===== ids only in b =====")
pprint.pp(list(map(format_id, b_only)), width=200)

different_occurrences = set()
different_body = set()

for asset in both_ids:
if a_metrics.id_occurrences[asset] != b_metrics.id_occurrences[asset]:
different_occurrences.add(asset)
else:
a_asset = pak_a.get_asset(asset, can_be_compressed=True)
b_asset = pak_b.get_asset(asset, can_be_compressed=True)
if a_asset != b_asset:
if a_asset.compressed != b_asset.compressed:
if pak_a.get_asset(asset) == pak_b.get_asset(asset):
continue
different_body.add(asset)

if different_occurrences:
print("===== different occurrences =====")
pprint.pp(list(map(format_id, different_occurrences)), width=120)

if different_body:
print("===== different bodies =====")
pprint.pp(list(map(format_id, different_body)), width=120)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--game", required=True, choices=[g.name for g in Game])
parser.add_argument("--recursive", action="store_true")
parser.add_argument("pak_a", type=Path)
parser.add_argument("pak_b", type=Path)
args = parser.parse_args()
game = getattr(Game, args.game)

root_a: Path = args.pak_a
root_b: Path = args.pak_b

if args.recursive:
exists_in_a = {pak.relative_to(root_a).as_posix() for pak in root_a.rglob("*.pak")}
exists_in_b = {pak.relative_to(root_b).as_posix() for pak in root_b.rglob("*.pak")}

if exists_in_a - exists_in_b:
print(f"Paks only in A: {exists_in_a - exists_in_b}")

if exists_in_b - exists_in_a:
print(f"Paks only in B: {exists_in_b - exists_in_a}")

for pak in sorted(exists_in_a & exists_in_b):
assert isinstance(pak, str)
if pak.startswith("Metroid"):
continue
print(f">> Checking {pak}")
compare(root_a.joinpath(pak), root_b.joinpath(pak), game)
else:
compare(root_a, root_b, game)
print("I'm DONE")


if __name__ == '__main__':
main()