fluendo · mcesariniflu · Dec 9, 2024 · Dec 2, 2024
diff --git a/fluster/decoders/iso_mpeg4_aac.py b/fluster/decoders/iso_mpeg4_aac.py
@@ -15,6 +15,9 @@
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library. If not, see <https://www.gnu.org/licenses/>.
 
+import glob
+import os
+
 from fluster.codec import Codec, OutputFormat
 from fluster.decoder import Decoder, register_decoder
 from fluster.utils import file_checksum, run_command
@@ -48,4 +51,15 @@ def decode(
             timeout=timeout,
             verbose=verbose,
         )
+        base_output = output_filepath[:-4]
+        pcm_out_f00_file = f"{base_output}_f00.pcm"
+
+        if os.path.exists(pcm_out_f00_file):
+            return file_checksum(pcm_out_f00_file)
+
+        output_files = glob.glob(f"{base_output}_f[0-9][0-9].pcm")
+
+        for pcm_file in output_files:
+            return file_checksum(pcm_file)
+
         return file_checksum(output_filepath)
diff --git a/scripts/gen_aac.py b/scripts/gen_aac.py
@@ -22,6 +22,7 @@
 from html.parser import HTMLParser
 from multiprocessing import Pool
 import os
+import subprocess
 import sys
 import urllib.request
 import multiprocessing
@@ -44,15 +45,16 @@
 
 URL_MPEG4 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_14496-26_2010_Bitstreams/"
 URL_MPEG4_ADIF = URL_MPEG4 + "DVD1/mpeg4audio-conformance/compressedAdif/add-opt/"
-URL_MPEG4_WAV_REFS = URL_MPEG4 + "DVD2/mpeg4audio-conformance/referencesWav/"
+URL_MPEG4_MP4 = URL_MPEG4 + "DVD1/mpeg4audio-conformance/compressedMp4/"
+URL_MPEG4_WAV_REFS_DVD2 = URL_MPEG4 + "DVD2/mpeg4audio-conformance/referencesWav/"
+URL_MPEG4_WAV_REFS_DVD3 = URL_MPEG4 + "DVD3/mpeg4audio-conformance/referencesWav/"
 URL_MPEG4_WAV_REFS_MD5 = URL_MPEG4 + "DVD1/mpeg4audio-conformance/referencesWav/_checksum/"
 
-BITSTREAM_EXTS = [".adts", ".adif"]
+BITSTREAM_EXTS = [".adts", ".adif", ".mp4"]
 MD5_EXTS = [".wav.md5sum"]
 MD5_EXCLUDES = []
 RAW_EXTS = [".wav"]
 
-
 class HREFParser(HTMLParser):
     """Custom parser to find href links"""
 
@@ -108,13 +110,21 @@ def _callback_error(err):
             downloads = []
 
             print(f"\tDownloading output reference files for test suite {self.suite_name}")
+            # This regular expression is to catch the different variations of raw and checksum filenames,
+            # and to be able to download them in the same folder as the compressed one.
+            # e.g files ending with: _f00, _level64 or _boost1
+            regex = r"(_[a-zA-Z][0-9][0-9]$)|(_level[0-9]+$)|(_boost[0-9]+$)"
 
             for link in raw_bitstream_links:
                 file_name = os.path.basename(link)
                 base_name = file_name.split('.')[0]
-                main_prefix = "_".join(base_name.split('_')[:2])
 
-                directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
+                if re.search(regex, base_name):
+                    main_prefix = "_".join(base_name.split('_')[:2])
+                    directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
+                else:
+                    directory = os.path.join(test_suite.resources_dir, test_suite.name, base_name)
+
                 if not os.path.exists(directory):
                     os.makedirs(directory)
 
@@ -131,9 +141,13 @@ def _callback_error(err):
             for link in raw_bitstream_md5_links:
                 file_name = os.path.basename(link)
                 base_name = file_name.split('.')[0]
-                main_prefix = "_".join(base_name.split('_')[:2])
 
-                directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
+                if re.search(regex, base_name):
+                    main_prefix = "_".join(base_name.split('_')[:2])
+                    directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
+                else:
+                    directory = os.path.join(test_suite.resources_dir, test_suite.name, base_name)
+
                 if not os.path.exists(directory):
                     os.makedirs(directory)
 
@@ -168,49 +182,135 @@ def generate(self, download, jobs):
             dict(),
         )
 
-        hparser = HREFParser()
+        hparser_compressed = HREFParser()
+        hparser_raw = HREFParser()
+        hparser_raw_checksums = HREFParser()
 
         with urllib.request.urlopen(self.url_test_vectors) as resp:
             data = str(resp.read())
-            hparser.feed(data)
-        compressed_bitstream_links = [url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))]
+            hparser_compressed.feed(data)
+        compressed_bitstream_links = [url for url in hparser_compressed.links if url.endswith(tuple(BITSTREAM_EXTS))]
+
+        # Download compressed bitstream links
+        for source_url in compressed_bitstream_links:
+            input_filename = os.path.basename(source_url)
+            test_vector_name = os.path.splitext(input_filename)[0]
+            test_vector = TestVector(
+                test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
+            )
+            test_suite.test_vectors[test_vector_name] = test_vector
+
+        print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
+        if download:
+            test_suite.download(
+                jobs=jobs,
+                out_dir=test_suite.resources_dir,
+                verify=False,
+                extract_all=True,
+                keep_file=True,
+            )
+
+        # MPEG4_AAC-MP4 test suite
+        if test_suite.name == "MPEG4_AAC-MP4":
+            print (f"Identifying MP4 files that contain audio in test suite: {self.suite_name}")
+
+            # Validating audio files using ffprobe
+            ffprobe = utils.normalize_binary_cmd("ffprobe")
+            non_audio_test_vectors=[]
+            for test_vector in test_suite.test_vectors.values():
+                dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
+                absolute_path = os.path.join(os.getcwd(), dest_dir, test_vector.input_file)
+                command = [
+                    ffprobe,
+                    "-loglevel",
+                    "error",
+                    "-show_entries",
+                    "stream=codec_name",
+                    "-of",
+                    "csv=p=0",
+                    absolute_path
+                ]
+                result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+
+                # In case of error, create a new test vector list to be removed from the test suite
+                if result.returncode != 0:
+                    non_audio_test_vectors.append(test_vector.name)
+                else:
+                    for line in result.stdout.split():
+                        if line and "aac" not in line:
+                            non_audio_test_vectors.append(test_vector.name)
+                            break
+
+            # Removing non audio files test vectors
+            if non_audio_test_vectors:
+                print("Removing non-audio files and folders from hard drive")
+                for name in non_audio_test_vectors:
+
+                    # Removing files and folders from hard drive
+                    dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, name)
+                    absolute_path = os.path.join(os.getcwd(), dest_dir, name + ".mp4")
+                    absolute_path_folder = os.path.join(os.getcwd(), dest_dir)
+
+                    if os.path.exists(absolute_path):
+                        try:
+                            os.remove(absolute_path)
+                        except OSError as error:
+                            raise Exception(f"The file {absolute_path} couldn't be deleted.\n{error}")
+                        try:
+                            os.rmdir(absolute_path_folder)
+                        except OSError as error:
+                            raise Exception(f"The folder {absolute_path_folder} couldn't be deleted.\n{error}")
+
+                    # Remove test vectors from test suite and the corresponding links
+                    del(test_suite.test_vectors[str(name)])
+
+                    # Rewrite compressed bitstream link list
+                    compressed_bitstream_links[:] = [
+                        link for link in compressed_bitstream_links if os.path.splitext(os.path.basename(link))[0] != name
+                    ]
+
         compressed_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in compressed_bitstream_links]
 
         with urllib.request.urlopen(self.url_reference_vectors) as resp:
             data = str(resp.read())
-            hparser.feed(data)
-        raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]
+            hparser_raw.feed(data)
+        raw_bitstream_links = [url for url in hparser_raw.links if url.endswith(tuple(RAW_EXTS))]
 
-        raw_bitstream_names = [
-            os.path.splitext(os.path.basename(x))[0].split('_f')[0] for x in raw_bitstream_links
-        ]
+        # The reference files are divided in two DVDs for MPEG4_AAC-MP4 test suite
+        if test_suite.name == "MPEG4_AAC-MP4":
+            hparser_raw_extra = HREFParser()
+
+            # Get the DVD3 wav files
+            with urllib.request.urlopen(URL_MPEG4_WAV_REFS_DVD3) as resp:
+                data = str(resp.read())
+                hparser_raw_extra.feed(data)
+            raw_extra_bitstream_links = [url for url in hparser_raw_extra.links if url.endswith(tuple(RAW_EXTS))]
+
+            # Adding the DVD3 wav files to the rest of the files
+            raw_bitstream_links = raw_bitstream_links + raw_extra_bitstream_links
+
+        raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0].split('_f')[0] for x in raw_bitstream_links]
 
         missing_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]
         if missing_files:
-            print(f"Missing reference files: {missing_files}")
             for missing_file in missing_files:
                 print(f"Skipping test vector {missing_file}, as the reference file is missing.")
 
         raw_bitstream_names = [name for name in compressed_bitstream_names if name not in missing_files]
 
         # Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
-        raw_bitstream_links = [
-            link for link in raw_bitstream_links if any(name in link for name in raw_bitstream_names)
-        ]
+        raw_bitstream_links = [link for link in raw_bitstream_links if any(name in link for name in raw_bitstream_names)]
 
         with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
             data = str(resp.read())
-            hparser.feed(data)
-        raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]
-
+            hparser_raw_checksums.feed(data)
+        raw_bitstream_md5_links = [url for url in hparser_raw_checksums.links if url.endswith(tuple(MD5_EXTS))]
         raw_bitstream_md5_names = [
-            os.path.splitext(os.path.splitext(os.path.basename(x))[0].split('_f')[0])[0] for x in
-            raw_bitstream_md5_links
+            os.path.splitext(os.path.splitext(os.path.basename(x))[0].split('_f')[0])[0] for x in raw_bitstream_md5_links
         ]
 
         missing_checksum_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]
         if missing_checksum_files:
-            print(f"Missing reference checksum files: {missing_checksum_files}")
             for missing_checksum in missing_checksum_files:
                 print(f"Skipping checksum for {missing_checksum}, as the reference file is missing.")
 
@@ -222,24 +322,6 @@ def generate(self, download, jobs):
             link for link in raw_bitstream_md5_links if any(name in link for name in raw_bitstream_md5_names)
         ]
 
-        for source_url in compressed_bitstream_links:
-            input_filename = os.path.basename(source_url)
-            test_vector_name = os.path.splitext(input_filename)[0]
-            test_vector = TestVector(
-                test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
-            )
-            test_suite.test_vectors[test_vector_name] = test_vector
-
-        print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
-        if download:
-            test_suite.download(
-                jobs=jobs,
-                out_dir=test_suite.resources_dir,
-                verify=False,
-                extract_all=True,
-                keep_file=True,
-            )
-
         # Download test suite output reference and md5 checksum files
         self._download_raw_output_references_and_checksums(jobs, test_suite, raw_bitstream_links,
                                                            raw_bitstream_md5_links)
@@ -381,7 +463,19 @@ def _fill_checksum_aac(test_vector, dest_dir):
         Codec.AAC,
         "ISO IEC 14496-26 MPEG4 AAC ADIF test suite",
         URL_MPEG4_ADIF,
-        URL_MPEG4_WAV_REFS,
+        URL_MPEG4_WAV_REFS_DVD2,
+        URL_MPEG4_WAV_REFS_MD5,
+        False,
+    )
+    generator.generate(not args.skip_download, args.jobs)
+
+    generator = AACGenerator(
+        "MPEG4_AAC-MP4",
+        "MPEG4_AAC-MP4",
+        Codec.AAC,
+        "ISO IEC 14496-26 MPEG4 AAC MP4 test suite",
+        URL_MPEG4_MP4,
+        URL_MPEG4_WAV_REFS_DVD2,
         URL_MPEG4_WAV_REFS_MD5,
         False,
     )