1+ #!/usr/bin/env python3
2+ """
3+ Validate that a wheel's contents match its RECORD file.
4+
5+ This script checks:
6+ 1. All files in the wheel are listed in RECORD
7+ 2. All files in RECORD exist in the wheel
8+ 3. File hashes match (for files that have hashes in RECORD)
9+ 4. File sizes match
10+ """
11+
12+ import argparse
13+ import base64
14+ import csv
15+ import hashlib
16+ import io
17+ import sys
18+ import zipfile
19+ from pathlib import Path
20+
21+
22+ def compute_hash (data ):
23+ """Compute the urlsafe base64 encoded SHA256 hash of data."""
24+ hash_digest = hashlib .sha256 (data ).digest ()
25+ return base64 .urlsafe_b64encode (hash_digest ).rstrip (b'=' ).decode ('ascii' )
26+
27+
28+ def validate_wheel (wheel_path ):
29+ """Validate that wheel contents match its RECORD file."""
30+ errors = []
31+
32+ with zipfile .ZipFile (wheel_path , 'r' ) as wheel :
33+ # Find the RECORD file
34+ record_path = None
35+ for name in wheel .namelist ():
36+ if name .endswith ('.dist-info/RECORD' ):
37+ record_path = name
38+ break
39+
40+ if not record_path :
41+ errors .append ("No RECORD file found in wheel" )
42+ return errors
43+
44+ # Parse the RECORD file
45+ record_content = wheel .read (record_path ).decode ('utf-8' )
46+ record_entries = {}
47+
48+ reader = csv .reader (io .StringIO (record_content ))
49+ for row in reader :
50+ if not row or len (row ) < 3 :
51+ continue
52+
53+ file_path , hash_str , size_str = row [0 ], row [1 ], row [2 ]
54+ record_entries [file_path ] = {
55+ 'hash' : hash_str ,
56+ 'size' : int (size_str ) if size_str else None
57+ }
58+
59+ # Get all files in the wheel (excluding directories)
60+ wheel_files = set ()
61+ for name in wheel .namelist ():
62+ # Skip directories (they end with /)
63+ if not name .endswith ('/' ):
64+ wheel_files .add (name )
65+
66+ record_files = set (record_entries .keys ())
67+
68+ # Check for files in wheel but not in RECORD
69+ files_not_in_record = wheel_files - record_files
70+ if files_not_in_record :
71+ for f in sorted (files_not_in_record ):
72+ errors .append (f"File in wheel but not in RECORD: { f } " )
73+
74+ # Check for files in RECORD but not in wheel
75+ files_not_in_wheel = record_files - wheel_files
76+ if files_not_in_wheel :
77+ for f in sorted (files_not_in_wheel ):
78+ errors .append (f"File in RECORD but not in wheel: { f } " )
79+
80+ # Validate hashes and sizes for files that exist in both
81+ for file_path in record_files & wheel_files :
82+ # Skip the RECORD file itself
83+ if file_path == record_path :
84+ continue
85+
86+ record_entry = record_entries [file_path ]
87+ file_data = wheel .read (file_path )
88+
89+ # Check size
90+ if record_entry ['size' ] is not None :
91+ actual_size = len (file_data )
92+ if actual_size != record_entry ['size' ]:
93+ errors .append (
94+ f"Size mismatch for { file_path } : "
95+ f"RECORD says { record_entry ['size' ]} , actual is { actual_size } "
96+ )
97+
98+ # Check hash
99+ if record_entry ['hash' ]:
100+ # Parse the hash format (algorithm=base64hash)
101+ if '=' in record_entry ['hash' ]:
102+ algo , expected_hash = record_entry ['hash' ].split ('=' , 1 )
103+ if algo == 'sha256' :
104+ actual_hash = compute_hash (file_data )
105+ if actual_hash != expected_hash :
106+ errors .append (
107+ f"Hash mismatch for { file_path } : "
108+ f"RECORD says { expected_hash } , actual is { actual_hash } "
109+ )
110+ else :
111+ errors .append (f"Unknown hash algorithm { algo } for { file_path } (expected sha256)" )
112+ else :
113+ errors .append (f"Invalid hash format for { file_path } : { record_entry ['hash' ]} " )
114+ # The RECORD file itself should not have a hash
115+ elif file_path != record_path :
116+ errors .append (f"No hash recorded for { file_path } " )
117+
118+ return errors
119+
120+
121+ def main ():
122+ parser = argparse .ArgumentParser (description = "Validate wheel RECORD file matches contents" )
123+ parser .add_argument ("wheel" , help = "Path to wheel file to validate" )
124+
125+ args = parser .parse_args ()
126+
127+ wheel_path = Path (args .wheel )
128+ if not wheel_path .exists ():
129+ print (f"Error: Wheel file not found: { wheel_path } " , file = sys .stderr )
130+ sys .exit (1 )
131+
132+ print (f"Validating { wheel_path .name } ..." )
133+ errors = validate_wheel (wheel_path )
134+
135+ if errors :
136+ print (f"\n ❌ Found { len (errors )} error(s):" , file = sys .stderr )
137+ for error in errors :
138+ print (f" - { error } " , file = sys .stderr )
139+ sys .exit (1 )
140+
141+ print (f"✅ Wheel validation passed!" )
142+ return 0
143+
144+
145+ if __name__ == "__main__" :
146+ sys .exit (main ())
0 commit comments