Skip to content

Commit

Permalink
COM-12646: Remove .wav and .md5 code from AAC test suite generator
Browse files Browse the repository at this point in the history
  • Loading branch information
mdimopoulos committed Dec 12, 2024
1 parent c994fdd commit 44bf1a3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 231 deletions.
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,10 @@ h264_reference_decoder: ## build H.264 reference decoder
find $(CONTRIB_DIR)/JM/bin/umake -name "ldecod" -type f -exec cp {} $(DECODERS_DIR)/ \;

mpeg_2_aac_reference_decoder: ## build ISO MPEG2 AAC reference decoder
if ! dpkg -l | grep gcc-multilib -c >>/dev/null; then sudo apt-get install gcc-multilib; fi
if ! dpkg -l | grep g++-multilib -c >>/dev/null; then sudo apt-get install g++-multilib; fi
ifeq ($(KERNEL_NAME), Linux)
if ! dpkg -l | grep gcc-multilib -c >>/dev/null; then sudo apt-get install gcc-multilib; fi && true
if ! dpkg -l | grep g++-multilib -c >>/dev/null; then sudo apt-get install g++-multilib; fi && true
endif
if [ ! $(wildcard /usr/include/asm) ] && [ $(wildcard /usr/include/asm-generic) ]; then sudo ln -s /usr/include/asm-generic /usr/include/asm; fi

ifeq ($(wildcard $(CONTRIB_DIR)/C039486_Electronic_inserts),)
Expand Down Expand Up @@ -147,8 +149,10 @@ endif
sudo rm -f /usr/include/asm

mpeg_4_aac_reference_decoder: ## build ISO MPEG4 AAC reference decoder
ifeq ($(KERNEL_NAME), Linux)
if ! dpkg -l | grep gcc-multilib -c >>/dev/null; then sudo apt-get install gcc-multilib; fi
if ! dpkg -l | grep g++-multilib -c >>/dev/null; then sudo apt-get install g++-multilib; fi
endif
if [ ! $(wildcard /usr/include/asm) ] && [ $(wildcard /usr/include/asm-generic) ]; then sudo ln -s /usr/include/asm-generic /usr/include/asm; fi

ifeq ($(wildcard $(CONTRIB_DIR)/C050470e_Electronic_inserts), )
Expand Down
241 changes: 12 additions & 229 deletions scripts/gen_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Fluster - testing framework for decoders conformance
# Copyright (C) 2024, Fluendo, S.A.
# Author: Michalis Dimopoulod <mdimopoulos@fluendo.com>, Fluendo, S.A.
# Author: Michalis Dimopoulos <mdimopoulos@fluendo.com>, Fluendo, S.A.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
Expand All @@ -20,12 +20,10 @@
import argparse
import multiprocessing
import os
import re
import subprocess
import sys
import urllib.request
from html.parser import HTMLParser
from multiprocessing import Pool
from typing import Any, List, Optional, Tuple

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
Expand All @@ -39,20 +37,12 @@
URL_MPEG2 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/"
URL_MPEG2_ADTS = URL_MPEG2 + "compressedAdts"
URL_MPEG2_ADIF = URL_MPEG2 + "compressedAdif"
URL_MPEG2_WAV_REFS = URL_MPEG2 + "referencesWav"
URL_MPEG2_WAV_REFS_MD5 = URL_MPEG2 + "referencesWav/_checksum"

URL_MPEG4 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_14496-26_2010_Bitstreams/"
URL_MPEG4_ADIF = URL_MPEG4 + "DVD1/mpeg4audio-conformance/compressedAdif/add-opt/"
URL_MPEG4_MP4 = URL_MPEG4 + "DVD1/mpeg4audio-conformance/compressedMp4/"
URL_MPEG4_WAV_REFS_DVD2 = URL_MPEG4 + "DVD2/mpeg4audio-conformance/referencesWav/"
URL_MPEG4_WAV_REFS_DVD3 = URL_MPEG4 + "DVD3/mpeg4audio-conformance/referencesWav/"
URL_MPEG4_WAV_REFS_MD5 = URL_MPEG4 + "DVD1/mpeg4audio-conformance/referencesWav/_checksum/"

BITSTREAM_EXTS = [".adts", ".adif", ".mp4"]
MD5_EXTS = [".wav.md5sum"]
MD5_EXCLUDES: List[str] = []
RAW_EXTS = [".wav"]


class HREFParser(HTMLParser):
Expand Down Expand Up @@ -86,92 +76,15 @@ def __init__(
codec: Codec,
description: str,
url_test_vectors: str,
url_reference_vectors: str,
url_reference_vectors_checksums: str,
use_ffprobe: bool = False,
):
self.name = name
self.suite_name = suite_name
self.codec = codec
self.description = description
self.url_test_vectors = url_test_vectors
self.url_reference_vectors = url_reference_vectors
self.url_reference_vectors_checksums = url_reference_vectors_checksums
self.use_ffprobe = use_ffprobe

def _download_raw_output_references_and_checksums(
self, jobs: int, test_suite: TestSuite, raw_bitstream_links: List[str], raw_bitstream_md5_links: List[str]
) -> None:
"""Downloads raw output reference bitstreams and their checksums"""
with Pool(jobs) as pool:

def _callback_error(err: Any) -> None:
print(f"\nError downloading -> {err}\n")
pool.terminate()

downloads = []

print(f"\tDownloading output reference files for test suite {self.suite_name}")
# This regular expression is to catch the different variations of raw and checksum filenames,
# and to be able to download them in the same folder as the compressed one.
# e.g files ending with: _f00, _level64 or _boost1
regex = r"(_[a-zA-Z][0-9][0-9]$)|(_level[0-9]+$)|(_boost[0-9]+$)"

for link in raw_bitstream_links:
file_name = os.path.basename(link)
base_name = file_name.split(".")[0]

if re.search(regex, base_name):
main_prefix = "_".join(base_name.split("_")[:2])
directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
else:
directory = os.path.join(test_suite.resources_dir, test_suite.name, base_name)

if not os.path.exists(directory):
os.makedirs(directory)

downloads.append(
pool.apply_async(
utils.download,
args=(link, directory),
error_callback=_callback_error,
)
)

print(f"\tDownloading output reference checksum files for test suite {self.suite_name}")

for link in raw_bitstream_md5_links:
file_name = os.path.basename(link)
base_name = file_name.split(".")[0]

if re.search(regex, base_name):
main_prefix = "_".join(base_name.split("_")[:2])
directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
else:
directory = os.path.join(test_suite.resources_dir, test_suite.name, base_name)

if not os.path.exists(directory):
os.makedirs(directory)

downloads.append(
pool.apply_async(
utils.download,
args=(link, directory),
error_callback=_callback_error,
)
)

pool.close()
pool.join()

for job in downloads:
try:
job.get()
if not job.successful():
raise ValueError("Download task was not successful")
except Exception as e:
sys.exit(f"Some download failed: {e}")

def generate(self, download: bool, jobs: int) -> None:
"""Generates the test suite and saves it to a file"""
output_filepath = os.path.join(self.suite_name + ".json")
Expand All @@ -185,8 +98,6 @@ def generate(self, download: bool, jobs: int) -> None:
)

hparser_compressed = HREFParser()
hparser_raw = HREFParser()
hparser_raw_checksums = HREFParser()

with urllib.request.urlopen(self.url_test_vectors) as resp:
data = str(resp.read())
Expand Down Expand Up @@ -263,74 +174,6 @@ def generate(self, download: bool, jobs: int) -> None:
# Remove test vectors from test suite and the corresponding links
del test_suite.test_vectors[str(name)]

# Rewrite compressed bitstream link list
compressed_bitstream_links[:] = [
link
for link in compressed_bitstream_links
if os.path.splitext(os.path.basename(link))[0] != name
]

compressed_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in compressed_bitstream_links]

with urllib.request.urlopen(self.url_reference_vectors) as resp:
data = str(resp.read())
hparser_raw.feed(data)
raw_bitstream_links = [url for url in hparser_raw.links if url.endswith(tuple(RAW_EXTS))]

# The reference files are divided in two DVDs for MPEG4_AAC-MP4 test suite
if test_suite.name == "MPEG4_AAC-MP4":
hparser_raw_extra = HREFParser()

# Get the DVD3 wav files
with urllib.request.urlopen(URL_MPEG4_WAV_REFS_DVD3) as resp:
data = str(resp.read())
hparser_raw_extra.feed(data)
raw_extra_bitstream_links = [url for url in hparser_raw_extra.links if url.endswith(tuple(RAW_EXTS))]

# Adding the DVD3 wav files to the rest of the files
raw_bitstream_links = raw_bitstream_links + raw_extra_bitstream_links

raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0].split("_f")[0] for x in raw_bitstream_links]

missing_files = list(set(compressed_bitstream_names).difference(raw_bitstream_names))
if missing_files:
for missing_file in missing_files:
print(f"Skipping test vector {missing_file}, as the reference file is missing.")

raw_bitstream_names = [name for name in compressed_bitstream_names if name not in missing_files]

# Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
raw_bitstream_links = [
link for link in raw_bitstream_links if any(name in link for name in raw_bitstream_names)
]

with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
data = str(resp.read())
hparser_raw_checksums.feed(data)
raw_bitstream_md5_links = [url for url in hparser_raw_checksums.links if url.endswith(tuple(MD5_EXTS))]
raw_bitstream_md5_names = [
os.path.splitext(os.path.splitext(os.path.basename(x))[0].split("_f")[0])[0]
for x in raw_bitstream_md5_links
]

missing_checksum_files = list(set(compressed_bitstream_names).difference(raw_bitstream_md5_names))
if missing_checksum_files:
for missing_checksum in missing_checksum_files:
print(f"Skipping checksum for {missing_checksum}, as the reference file is missing.")

raw_bitstream_md5_names = [name for name in compressed_bitstream_names if name not in missing_checksum_files]

# Match and store entries of raw_bitstream_md5_links that contain entries of raw_bitstream_md5_names
# as substrings
raw_bitstream_md5_links = [
link for link in raw_bitstream_md5_links if any(name in link for name in raw_bitstream_md5_names)
]

# Download test suite output reference and md5 checksum files
self._download_raw_output_references_and_checksums(
jobs, test_suite, raw_bitstream_links, raw_bitstream_md5_links
)

for test_vector in test_suite.test_vectors.values():
dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
Expand Down Expand Up @@ -365,61 +208,9 @@ def generate(self, download: bool, jobs: int) -> None:
except KeyError as key_err:
raise key_err

# Read or calculate checksum of expected raw output
if test_vector.name not in missing_checksum_files:
self._fill_checksum_aac(test_vector, dest_dir)

test_suite.to_json_file(output_filepath)
print("Generate new test suite: " + test_suite.name + ".json")

@staticmethod
def _fill_checksum_aac(test_vector: TestVector, dest_dir: str) -> None:
base_name = test_vector.name
raw_file = None
ext = None

for ext in RAW_EXTS:
exact_file = os.path.join(dest_dir, base_name + ext)
if os.path.exists(exact_file):
raw_file = exact_file
break

if not raw_file:
for ext in RAW_EXTS:
fallback_file = os.path.join(dest_dir, base_name + "_f00" + ext)
if os.path.exists(fallback_file):
raw_file = fallback_file
break

if not raw_file:
raise Exception(
f"Neither {base_name + ext} nor {base_name + '_f00' + ext} found with extensions {RAW_EXTS} "
f"in {dest_dir}"
)

checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
if checksum_file is None:
raise Exception("MD5 not found")

with open(checksum_file, "r") as checksum_fh:
regex = re.compile(r"([a-fA-F0-9]{32,}).*(?:\.(wav))?")
lines = checksum_fh.readlines()
# Filter out empty lines
filtered_lines = [line.strip() for line in lines if line.strip()]
# Prefer lines matching the regex pattern
match = next(
(regex.match(line) for line in filtered_lines if regex.match(line)),
None,
)
if match:
test_vector.result = match.group(1).lower()
# Assert that we have extracted a valid MD5 from the file
assert (
len(test_vector.result) == 32 and re.search(r"^[a-fA-F0-9]{32}$", test_vector.result) is not None
), f"{test_vector.result} is not a valid MD5 hash"

test_vector.result = utils.file_checksum(raw_file)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand All @@ -438,38 +229,32 @@ def _fill_checksum_aac(test_vector: TestVector, dest_dir: str) -> None:
)
args = parser.parse_args()

generator = AACGenerator(
"MPEG2_AAC-ADTS",
"MPEG2_AAC-ADTS",
Codec.AAC,
"ISO IEC 13818-4 MPEG2 AAC ADTS test suite",
URL_MPEG2_ADTS,
URL_MPEG2_WAV_REFS,
URL_MPEG2_WAV_REFS_MD5,
True,
)
generator.generate(not args.skip_download, args.jobs)

generator = AACGenerator(
"MPEG2_AAC-ADIF",
"MPEG2_AAC-ADIF",
Codec.AAC,
"ISO IEC 13818-4 MPEG2 AAC ADIF test suite",
URL_MPEG2_ADIF,
URL_MPEG2_WAV_REFS,
URL_MPEG2_WAV_REFS_MD5,
False,
)
generator.generate(not args.skip_download, args.jobs)

generator = AACGenerator(
"MPEG2_AAC-ADTS",
"MPEG2_AAC-ADTS",
Codec.AAC,
"ISO IEC 13818-4 MPEG2 AAC ADTS test suite",
URL_MPEG2_ADTS,
True,
)
generator.generate(not args.skip_download, args.jobs)

generator = AACGenerator(
"MPEG4_AAC-ADIF",
"MPEG4_AAC-ADIF",
Codec.AAC,
"ISO IEC 14496-26 MPEG4 AAC ADIF test suite",
URL_MPEG4_ADIF,
URL_MPEG4_WAV_REFS_DVD2,
URL_MPEG4_WAV_REFS_MD5,
False,
)
generator.generate(not args.skip_download, args.jobs)
Expand All @@ -480,8 +265,6 @@ def _fill_checksum_aac(test_vector: TestVector, dest_dir: str) -> None:
Codec.AAC,
"ISO IEC 14496-26 MPEG4 AAC MP4 test suite",
URL_MPEG4_MP4,
URL_MPEG4_WAV_REFS_DVD2,
URL_MPEG4_WAV_REFS_MD5,
False,
True,
)
generator.generate(not args.skip_download, args.jobs)

0 comments on commit 44bf1a3

Please sign in to comment.