Skip to content

Commit 2eb054d

Browse files
committed
add wheel verification to build job
1 parent 7d758f4 commit 2eb054d

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

.github/workflows/build_python_3.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,15 @@ jobs:
115115
fi
116116
done
117117
118+
- name: Validate wheel RECORD files
119+
run: |
120+
for wheel in ./wheelhouse/*.whl; do
121+
if [ -f "$wheel" ]; then
122+
echo "Validating $(basename $wheel)..."
123+
python scripts/validate_wheel.py "$wheel"
124+
fi
125+
done
126+
118127
- if: runner.os != 'Windows'
119128
run: |
120129
echo "ARTIFACT_NAME=${{ matrix.only }}" >> $GITHUB_ENV

scripts/validate_wheel.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Validate that a wheel's contents match its RECORD file.
4+
5+
This script checks:
6+
1. All files in the wheel are listed in RECORD
7+
2. All files in RECORD exist in the wheel
8+
3. File hashes match (for files that have hashes in RECORD)
9+
4. File sizes match
10+
"""
11+
12+
import argparse
13+
import base64
14+
import csv
15+
import hashlib
16+
import io
17+
import sys
18+
import zipfile
19+
from pathlib import Path
20+
21+
22+
def compute_hash(data):
23+
"""Compute the urlsafe base64 encoded SHA256 hash of data."""
24+
hash_digest = hashlib.sha256(data).digest()
25+
return base64.urlsafe_b64encode(hash_digest).rstrip(b'=').decode('ascii')
26+
27+
28+
def validate_wheel(wheel_path):
29+
"""Validate that wheel contents match its RECORD file."""
30+
errors = []
31+
32+
with zipfile.ZipFile(wheel_path, 'r') as wheel:
33+
# Find the RECORD file
34+
record_path = None
35+
for name in wheel.namelist():
36+
if name.endswith('.dist-info/RECORD'):
37+
record_path = name
38+
break
39+
40+
if not record_path:
41+
errors.append("No RECORD file found in wheel")
42+
return errors
43+
44+
# Parse the RECORD file
45+
record_content = wheel.read(record_path).decode('utf-8')
46+
record_entries = {}
47+
48+
reader = csv.reader(io.StringIO(record_content))
49+
for row in reader:
50+
if not row or len(row) < 3:
51+
continue
52+
53+
file_path, hash_str, size_str = row[0], row[1], row[2]
54+
record_entries[file_path] = {
55+
'hash': hash_str,
56+
'size': int(size_str) if size_str else None
57+
}
58+
59+
# Get all files in the wheel (excluding directories)
60+
wheel_files = set()
61+
for name in wheel.namelist():
62+
# Skip directories (they end with /)
63+
if not name.endswith('/'):
64+
wheel_files.add(name)
65+
66+
record_files = set(record_entries.keys())
67+
68+
# Check for files in wheel but not in RECORD
69+
files_not_in_record = wheel_files - record_files
70+
if files_not_in_record:
71+
for f in sorted(files_not_in_record):
72+
errors.append(f"File in wheel but not in RECORD: {f}")
73+
74+
# Check for files in RECORD but not in wheel
75+
files_not_in_wheel = record_files - wheel_files
76+
if files_not_in_wheel:
77+
for f in sorted(files_not_in_wheel):
78+
errors.append(f"File in RECORD but not in wheel: {f}")
79+
80+
# Validate hashes and sizes for files that exist in both
81+
for file_path in record_files & wheel_files:
82+
# Skip the RECORD file itself
83+
if file_path == record_path:
84+
continue
85+
86+
record_entry = record_entries[file_path]
87+
file_data = wheel.read(file_path)
88+
89+
# Check size
90+
if record_entry['size'] is not None:
91+
actual_size = len(file_data)
92+
if actual_size != record_entry['size']:
93+
errors.append(
94+
f"Size mismatch for {file_path}: "
95+
f"RECORD says {record_entry['size']}, actual is {actual_size}"
96+
)
97+
98+
# Check hash
99+
if record_entry['hash']:
100+
# Parse the hash format (algorithm=base64hash)
101+
if '=' in record_entry['hash']:
102+
algo, expected_hash = record_entry['hash'].split('=', 1)
103+
if algo == 'sha256':
104+
actual_hash = compute_hash(file_data)
105+
if actual_hash != expected_hash:
106+
errors.append(
107+
f"Hash mismatch for {file_path}: "
108+
f"RECORD says {expected_hash}, actual is {actual_hash}"
109+
)
110+
else:
111+
errors.append(f"Unknown hash algorithm {algo} for {file_path} (expected sha256)")
112+
else:
113+
errors.append(f"Invalid hash format for {file_path}: {record_entry['hash']}")
114+
# The RECORD file itself should not have a hash
115+
elif file_path != record_path:
116+
errors.append(f"No hash recorded for {file_path}")
117+
118+
return errors
119+
120+
121+
def main():
122+
parser = argparse.ArgumentParser(description="Validate wheel RECORD file matches contents")
123+
parser.add_argument("wheel", help="Path to wheel file to validate")
124+
125+
args = parser.parse_args()
126+
127+
wheel_path = Path(args.wheel)
128+
if not wheel_path.exists():
129+
print(f"Error: Wheel file not found: {wheel_path}", file=sys.stderr)
130+
sys.exit(1)
131+
132+
print(f"Validating {wheel_path.name}...")
133+
errors = validate_wheel(wheel_path)
134+
135+
if errors:
136+
print(f"\n❌ Found {len(errors)} error(s):", file=sys.stderr)
137+
for error in errors:
138+
print(f" - {error}", file=sys.stderr)
139+
sys.exit(1)
140+
141+
print(f"✅ Wheel validation passed!")
142+
return 0
143+
144+
145+
if __name__ == "__main__":
146+
sys.exit(main())

0 commit comments

Comments
 (0)