Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COM-12464 - MPEG-4 AAC mp4 #219

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions fluster/decoders/iso_mpeg4_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see <https://www.gnu.org/licenses/>.

import glob
import os

from fluster.codec import Codec, OutputFormat
from fluster.decoder import Decoder, register_decoder
from fluster.utils import file_checksum, run_command
Expand Down Expand Up @@ -48,4 +51,15 @@ def decode(
timeout=timeout,
verbose=verbose,
)
base_output = output_filepath[:-4]
pcm_out_f00_file = f"{base_output}_f00.pcm"

if os.path.exists(pcm_out_f00_file):
return file_checksum(pcm_out_f00_file)

output_files = glob.glob(f"{base_output}_f[0-9][0-9].pcm")

for pcm_file in output_files:
return file_checksum(pcm_file)

return file_checksum(output_filepath)
182 changes: 138 additions & 44 deletions scripts/gen_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from html.parser import HTMLParser
from multiprocessing import Pool
import os
import subprocess
import sys
import urllib.request
import multiprocessing
Expand All @@ -44,15 +45,16 @@

URL_MPEG4 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_14496-26_2010_Bitstreams/"
URL_MPEG4_ADIF = URL_MPEG4 + "DVD1/mpeg4audio-conformance/compressedAdif/add-opt/"
URL_MPEG4_WAV_REFS = URL_MPEG4 + "DVD2/mpeg4audio-conformance/referencesWav/"
URL_MPEG4_MP4 = URL_MPEG4 + "DVD1/mpeg4audio-conformance/compressedMp4/"
URL_MPEG4_WAV_REFS_DVD2 = URL_MPEG4 + "DVD2/mpeg4audio-conformance/referencesWav/"
URL_MPEG4_WAV_REFS_DVD3 = URL_MPEG4 + "DVD3/mpeg4audio-conformance/referencesWav/"
URL_MPEG4_WAV_REFS_MD5 = URL_MPEG4 + "DVD1/mpeg4audio-conformance/referencesWav/_checksum/"

BITSTREAM_EXTS = [".adts", ".adif"]
BITSTREAM_EXTS = [".adts", ".adif", ".mp4"]
MD5_EXTS = [".wav.md5sum"]
MD5_EXCLUDES = []
RAW_EXTS = [".wav"]


class HREFParser(HTMLParser):
"""Custom parser to find href links"""

Expand Down Expand Up @@ -108,13 +110,21 @@ def _callback_error(err):
downloads = []

print(f"\tDownloading output reference files for test suite {self.suite_name}")
# This regular expression is to catch the different variations of raw and checksum filenames,
# and to be able to download them in the same folder as the compressed one.
# e.g files ending with: _f00, _level64 or _boost1
regex = r"(_[a-zA-Z][0-9][0-9]$)|(_level[0-9]+$)|(_boost[0-9]+$)"
mdimopoulos marked this conversation as resolved.
Show resolved Hide resolved

for link in raw_bitstream_links:
file_name = os.path.basename(link)
base_name = file_name.split('.')[0]
main_prefix = "_".join(base_name.split('_')[:2])

directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
if re.search(regex, base_name):
main_prefix = "_".join(base_name.split('_')[:2])
directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
else:
directory = os.path.join(test_suite.resources_dir, test_suite.name, base_name)

if not os.path.exists(directory):
os.makedirs(directory)

Expand All @@ -131,9 +141,13 @@ def _callback_error(err):
for link in raw_bitstream_md5_links:
file_name = os.path.basename(link)
base_name = file_name.split('.')[0]
main_prefix = "_".join(base_name.split('_')[:2])

directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
if re.search(regex, base_name):
main_prefix = "_".join(base_name.split('_')[:2])
directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
else:
directory = os.path.join(test_suite.resources_dir, test_suite.name, base_name)

if not os.path.exists(directory):
os.makedirs(directory)

Expand Down Expand Up @@ -168,49 +182,135 @@ def generate(self, download, jobs):
dict(),
)

hparser = HREFParser()
hparser_compressed = HREFParser()
hparser_raw = HREFParser()
hparser_raw_checksums = HREFParser()

with urllib.request.urlopen(self.url_test_vectors) as resp:
data = str(resp.read())
hparser.feed(data)
compressed_bitstream_links = [url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))]
hparser_compressed.feed(data)
compressed_bitstream_links = [url for url in hparser_compressed.links if url.endswith(tuple(BITSTREAM_EXTS))]

# Download compressed bitstream links
for source_url in compressed_bitstream_links:
input_filename = os.path.basename(source_url)
test_vector_name = os.path.splitext(input_filename)[0]
test_vector = TestVector(
test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
)
test_suite.test_vectors[test_vector_name] = test_vector

print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
if download:
test_suite.download(
jobs=jobs,
out_dir=test_suite.resources_dir,
verify=False,
extract_all=True,
keep_file=True,
)

# MPEG4_AAC-MP4 test suite
if test_suite.name == "MPEG4_AAC-MP4":
print (f"Identifying MP4 files that contain audio in test suite: {self.suite_name}")

# Validating audio files using ffprobe
ffprobe = utils.normalize_binary_cmd("ffprobe")
non_audio_test_vectors=[]
for test_vector in test_suite.test_vectors.values():
dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
absolute_path = os.path.join(os.getcwd(), dest_dir, test_vector.input_file)
command = [
mdimopoulos marked this conversation as resolved.
Show resolved Hide resolved
ffprobe,
"-loglevel",
"error",
"-show_entries",
"stream=codec_name",
"-of",
"csv=p=0",
absolute_path
]
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# In case of error, create a new test vector list to be removed from the test suite
if result.returncode != 0:
non_audio_test_vectors.append(test_vector.name)
else:
for line in result.stdout.split():
if line and "aac" not in line:
non_audio_test_vectors.append(test_vector.name)
break

# Removing non audio files test vectors
if non_audio_test_vectors:
print("Removing non-audio files and folders from hard drive")
for name in non_audio_test_vectors:

# Removing files and folders from hard drive
dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, name)
absolute_path = os.path.join(os.getcwd(), dest_dir, name + ".mp4")
absolute_path_folder = os.path.join(os.getcwd(), dest_dir)

if os.path.exists(absolute_path):
try:
os.remove(absolute_path)
except OSError as error:
raise Exception(f"The file {absolute_path} couldn't be deleted.\n{error}")
try:
os.rmdir(absolute_path_folder)
except OSError as error:
raise Exception(f"The folder {absolute_path_folder} couldn't be deleted.\n{error}")

# Remove test vectors from test suite and the corresponding links
del(test_suite.test_vectors[str(name)])

# Rewrite compressed bitstream link list
compressed_bitstream_links[:] = [
link for link in compressed_bitstream_links if os.path.splitext(os.path.basename(link))[0] != name
]

compressed_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in compressed_bitstream_links]

with urllib.request.urlopen(self.url_reference_vectors) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]
hparser_raw.feed(data)
raw_bitstream_links = [url for url in hparser_raw.links if url.endswith(tuple(RAW_EXTS))]

raw_bitstream_names = [
os.path.splitext(os.path.basename(x))[0].split('_f')[0] for x in raw_bitstream_links
]
# The reference files are divided in two DVDs for MPEG4_AAC-MP4 test suite
if test_suite.name == "MPEG4_AAC-MP4":
hparser_raw_extra = HREFParser()

# Get the DVD3 wav files
with urllib.request.urlopen(URL_MPEG4_WAV_REFS_DVD3) as resp:
data = str(resp.read())
hparser_raw_extra.feed(data)
raw_extra_bitstream_links = [url for url in hparser_raw_extra.links if url.endswith(tuple(RAW_EXTS))]

# Adding the DVD3 wav files to the rest of the files
raw_bitstream_links = raw_bitstream_links + raw_extra_bitstream_links

raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0].split('_f')[0] for x in raw_bitstream_links]

missing_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]
if missing_files:
print(f"Missing reference files: {missing_files}")
for missing_file in missing_files:
print(f"Skipping test vector {missing_file}, as the reference file is missing.")

raw_bitstream_names = [name for name in compressed_bitstream_names if name not in missing_files]

# Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
raw_bitstream_links = [
link for link in raw_bitstream_links if any(name in link for name in raw_bitstream_names)
]
raw_bitstream_links = [link for link in raw_bitstream_links if any(name in link for name in raw_bitstream_names)]

with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]

hparser_raw_checksums.feed(data)
raw_bitstream_md5_links = [url for url in hparser_raw_checksums.links if url.endswith(tuple(MD5_EXTS))]
raw_bitstream_md5_names = [
os.path.splitext(os.path.splitext(os.path.basename(x))[0].split('_f')[0])[0] for x in
raw_bitstream_md5_links
os.path.splitext(os.path.splitext(os.path.basename(x))[0].split('_f')[0])[0] for x in raw_bitstream_md5_links
]

missing_checksum_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]
if missing_checksum_files:
print(f"Missing reference checksum files: {missing_checksum_files}")
for missing_checksum in missing_checksum_files:
print(f"Skipping checksum for {missing_checksum}, as the reference file is missing.")

Expand All @@ -222,24 +322,6 @@ def generate(self, download, jobs):
link for link in raw_bitstream_md5_links if any(name in link for name in raw_bitstream_md5_names)
]

for source_url in compressed_bitstream_links:
input_filename = os.path.basename(source_url)
test_vector_name = os.path.splitext(input_filename)[0]
test_vector = TestVector(
test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
)
test_suite.test_vectors[test_vector_name] = test_vector

print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
if download:
test_suite.download(
jobs=jobs,
out_dir=test_suite.resources_dir,
verify=False,
extract_all=True,
keep_file=True,
)

# Download test suite output reference and md5 checksum files
self._download_raw_output_references_and_checksums(jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links)
Expand Down Expand Up @@ -381,7 +463,19 @@ def _fill_checksum_aac(test_vector, dest_dir):
Codec.AAC,
"ISO IEC 14496-26 MPEG4 AAC ADIF test suite",
URL_MPEG4_ADIF,
URL_MPEG4_WAV_REFS,
URL_MPEG4_WAV_REFS_DVD2,
URL_MPEG4_WAV_REFS_MD5,
False,
)
generator.generate(not args.skip_download, args.jobs)

generator = AACGenerator(
"MPEG4_AAC-MP4",
"MPEG4_AAC-MP4",
Codec.AAC,
"ISO IEC 14496-26 MPEG4 AAC MP4 test suite",
URL_MPEG4_MP4,
URL_MPEG4_WAV_REFS_DVD2,
URL_MPEG4_WAV_REFS_MD5,
False,
)
Expand Down
Loading