Skip to content

Commit

Permalink
Merge pull request #65 from randovania/feature/compare-paks
Browse files Browse the repository at this point in the history
Add compare_paks.py
  • Loading branch information
henriquegemignani authored Jul 7, 2023
2 parents 9788680 + b119ad1 commit b7fdb77
Showing 1 changed file with 120 additions and 0 deletions.
120 changes: 120 additions & 0 deletions tools/compare_paks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import argparse
import collections
import dataclasses
import pprint
from pathlib import Path

from retro_data_structures.base_resource import AssetId
from retro_data_structures.formats import Pak
from retro_data_structures.game_check import Game


@dataclasses.dataclass()
class PakMetrics:
id_occurrences: dict[AssetId, int]

@classmethod
def get_metrics(cls, pak: Pak):
id_occurrences = collections.defaultdict(int)
for file in pak._raw.files:
id_occurrences[file.asset_id] += 1

return cls(
id_occurrences=id_occurrences,
)


def compare(pak_a_path: Path, pak_b_path: Path, game: Game):
pak_a = Pak.parse(pak_a_path.read_bytes(), target_game=game)
pak_b = Pak.parse(pak_b_path.read_bytes(), target_game=game)

names = {}

for name, asset in pak_a.named_assets.items():
names[asset] = name
for name, asset in pak_b.named_assets.items():
names[asset] = name

def format_id(i):
s = f"0x{i:08X}"
if i in names:
s += f" ({names[i]})"
return s

a_metrics = PakMetrics.get_metrics(pak_a)
b_metrics = PakMetrics.get_metrics(pak_b)

a_ids_set = set(a_metrics.id_occurrences.keys())
b_ids_set = set(b_metrics.id_occurrences.keys())
both_ids = a_ids_set.intersection(b_ids_set)

a_only = a_ids_set - b_ids_set
b_only = b_ids_set - a_ids_set

if a_only:
print("===== ids only in a =====")
pprint.pp(list(map(format_id, a_only)), width=200)

if b_only:
print("===== ids only in b =====")
pprint.pp(list(map(format_id, b_only)), width=200)

different_occurrences = set()
different_body = set()

for asset in both_ids:
if a_metrics.id_occurrences[asset] != b_metrics.id_occurrences[asset]:
different_occurrences.add(asset)
else:
a_asset = pak_a.get_asset(asset, can_be_compressed=True)
b_asset = pak_b.get_asset(asset, can_be_compressed=True)
if a_asset != b_asset:
if a_asset.compressed != b_asset.compressed:
if pak_a.get_asset(asset) == pak_b.get_asset(asset):
continue
different_body.add(asset)

if different_occurrences:
print("===== different occurrences =====")
pprint.pp(list(map(format_id, different_occurrences)), width=120)

if different_body:
print("===== different bodies =====")
pprint.pp(list(map(format_id, different_body)), width=120)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--game", required=True, choices=[g.name for g in Game])
parser.add_argument("--recursive", action="store_true")
parser.add_argument("pak_a", type=Path)
parser.add_argument("pak_b", type=Path)
args = parser.parse_args()
game = getattr(Game, args.game)

root_a: Path = args.pak_a
root_b: Path = args.pak_b

if args.recursive:
exists_in_a = {pak.relative_to(root_a).as_posix() for pak in root_a.rglob("*.pak")}
exists_in_b = {pak.relative_to(root_b).as_posix() for pak in root_b.rglob("*.pak")}

if exists_in_a - exists_in_b:
print(f"Paks only in A: {exists_in_a - exists_in_b}")

if exists_in_b - exists_in_a:
print(f"Paks only in B: {exists_in_b - exists_in_a}")

for pak in sorted(exists_in_a & exists_in_b):
assert isinstance(pak, str)
if pak.startswith("Metroid"):
continue
print(f">> Checking {pak}")
compare(root_a.joinpath(pak), root_b.joinpath(pak), game)
else:
compare(root_a, root_b, game)
print("I'm DONE")


if __name__ == '__main__':
main()

0 comments on commit b7fdb77

Please sign in to comment.