Skip to content

Commit

Permalink
COM-12568: Extend lint and formatting to scripts folder
Browse files Browse the repository at this point in the history
- added folder "scripts" to Makefile path for checks
- refactored .py files under scripts folder
  • Loading branch information
mdimopoulos committed Dec 4, 2024
1 parent ecfa8c8 commit 340cf86
Show file tree
Hide file tree
Showing 10 changed files with 259 additions and 345 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PY_FILES=fluster
PY_FILES=fluster scripts
CONTRIB_DIR=contrib
DECODERS_DIR=decoders
PYTHONPATH=.
Expand Down Expand Up @@ -105,7 +105,7 @@ mpeg_2_aac_reference_decoder: ## build ISO MPEG2 AAC reference decoder
if ! dpkg -l | grep g++-multilib -c >>/dev/null; then sudo apt-get install g++-multilib; fi
if [ ! $(wildcard /usr/include/asm) ] && [ $(wildcard /usr/include/asm-generic) ]; then sudo ln -s /usr/include/asm-generic /usr/include/asm; fi

ifeq ($(wildcard $(CONTRIB_DIR)/C039486_Electronic_inserts), )
ifeq ($(wildcard $(CONTRIB_DIR)/C039486_Electronic_inserts),)
$(create_dirs)
cd $(CONTRIB_DIR) && rm -f iso_cookies.txt
cd $(CONTRIB_DIR) && wget -qO- --keep-session-cookies --save-cookies iso_cookies.txt \
Expand Down
6 changes: 1 addition & 5 deletions fluster/decoders/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,7 @@ def check(self, verbose: bool) -> bool:
# Get ffmpeg version
output = _run_ffmpeg_command(self.binary, "-version", verbose=verbose)
version = re.search(r" version n?(\d+)\.(\d+)(?:\.(\d+))?", output)
self.ffmpeg_version = (
tuple(map(lambda x: int(x) if x else 0, version.groups()))
if version
else None
)
self.ffmpeg_version = tuple((int(x) if x else 0 for x in version.groups())) if version else None

# Check if codec can be used
output = _run_ffmpeg_command(self.binary, "-codecs", verbose=verbose)
Expand Down
4 changes: 1 addition & 3 deletions fluster/fluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,7 @@ def _load_test_suites(self) -> None:
if len(self.test_suites) == 0:
raise Exception(f'No test suites found in "{self.test_suites_dir}"')

def list_decoders(
self, check: bool, verbose: bool, codec: Optional[Codec] = None
) -> None:
def list_decoders(self, check: bool, verbose: bool, codec: Optional[Codec] = None) -> None:
"""List all the available decoders"""
print("\nList of available decoders:")
decoders_dict: Dict[Codec, List[Decoder]] = {}
Expand Down
1 change: 0 additions & 1 deletion fluster/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from tempfile import gettempdir
from typing import Any, Tuple


from fluster import utils
from fluster.codec import Codec
from fluster.fluster import Context, Fluster, SummaryFormat
Expand Down
84 changes: 41 additions & 43 deletions scripts/gen_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,20 @@
# License along with this library. If not, see <https://www.gnu.org/licenses/>.

import argparse
import re
from html.parser import HTMLParser
from multiprocessing import Pool
import multiprocessing
import os
import re
import sys
import urllib.request
import multiprocessing
from html.parser import HTMLParser
from multiprocessing import Pool
from typing import Any, List, Optional, Tuple

# pylint: disable=wrong-import-position
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from fluster import utils
from fluster.codec import Codec, OutputFormat
from fluster.test_suite import TestSuite, TestVector

# pylint: enable=wrong-import-position
from fluster.test_suite import TestSuite
from fluster.test_vector import TestVector

BASE_URL = "https://standards.iso.org/"

Expand All @@ -49,29 +48,29 @@

BITSTREAM_EXTS = [".adts", ".adif"]
MD5_EXTS = [".wav.md5sum"]
MD5_EXCLUDES = []
MD5_EXCLUDES: List[str] = []
RAW_EXTS = [".wav"]


class HREFParser(HTMLParser):
"""Custom parser to find href links"""

def __init__(self):
self.links = []
def __init__(self) -> None:
self.links: List[Any] = []
super().__init__()

def error(self, message):
def error(self, message: str) -> None:
print(message)

def handle_starttag(self, tag, attrs):
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
# Only parse the 'anchor' tag.
if tag == "a":
# Check the list of defined attributes.
for name, value in attrs:
# If href is defined, print it.
if name == "href":
base_url = BASE_URL if BASE_URL[-1] != "/" else BASE_URL[0:-1]
self.links.append(base_url + value)
self.links.append(base_url + str(value))


class AACGenerator:
Expand All @@ -97,11 +96,13 @@ def __init__(
self.url_reference_vectors_checksums = url_reference_vectors_checksums
self.use_ffprobe = use_ffprobe

def _download_raw_output_references_and_checksums(self, jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links):
def _download_raw_output_references_and_checksums(
self, jobs: int, test_suite: TestSuite, raw_bitstream_links: List[str], raw_bitstream_md5_links: List[str]
) -> None:
"""Downloads raw output reference bitstreams and their checksums"""
with Pool(jobs) as pool:
def _callback_error(err):

def _callback_error(err: Any) -> None:
print(f"\nError downloading -> {err}\n")
pool.terminate()

Expand All @@ -111,8 +112,8 @@ def _callback_error(err):

for link in raw_bitstream_links:
file_name = os.path.basename(link)
base_name = file_name.split('.')[0]
main_prefix = "_".join(base_name.split('_')[:2])
base_name = file_name.split(".")[0]
main_prefix = "_".join(base_name.split("_")[:2])

directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
if not os.path.exists(directory):
Expand All @@ -130,8 +131,8 @@ def _callback_error(err):

for link in raw_bitstream_md5_links:
file_name = os.path.basename(link)
base_name = file_name.split('.')[0]
main_prefix = "_".join(base_name.split('_')[:2])
base_name = file_name.split(".")[0]
main_prefix = "_".join(base_name.split("_")[:2])

directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
if not os.path.exists(directory):
Expand All @@ -156,7 +157,7 @@ def _callback_error(err):
except Exception as e:
sys.exit(f"Some download failed: {e}")

def generate(self, download, jobs):
def generate(self, download: bool, jobs: int) -> None:
"""Generates the test suite and saves it to a file"""
output_filepath = os.path.join(self.suite_name + ".json")
test_suite = TestSuite(
Expand All @@ -165,7 +166,7 @@ def generate(self, download, jobs):
self.suite_name,
self.codec,
self.description,
dict(),
{},
)

hparser = HREFParser()
Expand All @@ -181,11 +182,9 @@ def generate(self, download, jobs):
hparser.feed(data)
raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]

raw_bitstream_names = [
os.path.splitext(os.path.basename(x))[0].split('_f')[0] for x in raw_bitstream_links
]
raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0].split("_f")[0] for x in raw_bitstream_links]

missing_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]
missing_files = list(set(compressed_bitstream_names).difference(raw_bitstream_names))
if missing_files:
print(f"Missing reference files: {missing_files}")
for missing_file in missing_files:
Expand All @@ -204,11 +203,11 @@ def generate(self, download, jobs):
raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]

raw_bitstream_md5_names = [
os.path.splitext(os.path.splitext(os.path.basename(x))[0].split('_f')[0])[0] for x in
raw_bitstream_md5_links
os.path.splitext(os.path.splitext(os.path.basename(x))[0].split("_f")[0])[0]
for x in raw_bitstream_md5_links
]

missing_checksum_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]
missing_checksum_files = list(set(compressed_bitstream_names).difference(raw_bitstream_md5_names))
if missing_checksum_files:
print(f"Missing reference checksum files: {missing_checksum_files}")
for missing_checksum in missing_checksum_files:
Expand All @@ -225,9 +224,7 @@ def generate(self, download, jobs):
for source_url in compressed_bitstream_links:
input_filename = os.path.basename(source_url)
test_vector_name = os.path.splitext(input_filename)[0]
test_vector = TestVector(
test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
)
test_vector = TestVector(test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, "")
test_suite.test_vectors[test_vector_name] = test_vector

print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
Expand All @@ -241,8 +238,9 @@ def generate(self, download, jobs):
)

# Download test suite output reference and md5 checksum files
self._download_raw_output_references_and_checksums(jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links)
self._download_raw_output_references_and_checksums(
jobs, test_suite, raw_bitstream_links, raw_bitstream_md5_links
)

for test_vector in test_suite.test_vectors.values():
dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
Expand Down Expand Up @@ -286,7 +284,7 @@ def generate(self, download, jobs):
print("Generate new test suite: " + test_suite.name + ".json")

@staticmethod
def _fill_checksum_aac(test_vector, dest_dir):
def _fill_checksum_aac(test_vector: TestVector, dest_dir: str) -> None:
base_name = test_vector.name
raw_file = None
ext = None
Expand All @@ -299,23 +297,24 @@ def _fill_checksum_aac(test_vector, dest_dir):

if not raw_file:
for ext in RAW_EXTS:
fallback_file = os.path.join(dest_dir, base_name + '_f00' + ext)
fallback_file = os.path.join(dest_dir, base_name + "_f00" + ext)
if os.path.exists(fallback_file):
raw_file = fallback_file
break

if not raw_file:
raise Exception(
f"Neither {base_name + ext} nor {base_name + '_f00' + ext} found with extensions {RAW_EXTS} in {dest_dir}"
f"Neither {base_name + ext} nor {base_name + '_f00' + ext} found with extensions {RAW_EXTS} "
f"in {dest_dir}"
)

checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
if checksum_file is None:
raise Exception("MD5 not found")

with open(checksum_file, "r") as checksum_file:
regex = re.compile(rf"([a-fA-F0-9]{{32,}}).*(?:\.(wav))?")
lines = checksum_file.readlines()
with open(checksum_file, "r") as checksum_fh:
regex = re.compile(r"([a-fA-F0-9]{32,}).*(?:\.(wav))?")
lines = checksum_fh.readlines()
# Filter out empty lines
filtered_lines = [line.strip() for line in lines if line.strip()]
# Prefer lines matching the regex pattern
Expand All @@ -327,8 +326,7 @@ def _fill_checksum_aac(test_vector, dest_dir):
test_vector.result = match.group(1).lower()
# Assert that we have extracted a valid MD5 from the file
assert (
len(test_vector.result) == 32
and re.search(r"^[a-fA-F0-9]{32}$", test_vector.result) is not None
len(test_vector.result) == 32 and re.search(r"^[a-fA-F0-9]{32}$", test_vector.result) is not None
), f"{test_vector.result} is not a valid MD5 hash"

test_vector.result = utils.file_checksum(raw_file)
Expand Down
Loading

0 comments on commit 340cf86

Please sign in to comment.