From 676229916e6520bd1c8394badc97715bbe3109a8 Mon Sep 17 00:00:00 2001 From: jakub-nt <175944085+jakub-nt@users.noreply.github.com> Date: Tue, 22 Oct 2024 19:31:01 +0200 Subject: [PATCH] Remove extra dependencies; clean up code Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com> --- masterfiles/download_all_versions.py | 113 ++++---------------- masterfiles/generate_release_information.py | 13 ++- 2 files changed, 28 insertions(+), 98 deletions(-) diff --git a/masterfiles/download_all_versions.py b/masterfiles/download_all_versions.py index c1413e8..58b9f0c 100644 --- a/masterfiles/download_all_versions.py +++ b/masterfiles/download_all_versions.py @@ -1,104 +1,35 @@ from pathlib import Path -from requests_cache import CachedSession -from shutil import unpack_archive -from urllib.request import urlretrieve +import shutil +import urllib.request + +from cfbs.utils import get_json DOWNLOAD = True -DEBUG = False ENTERPRISE_URL = "https://cfengine.com/release-data/enterprise/releases.json" COMMUNITY_URL = "https://cfengine.com/release-data/community/releases.json" - -def print_debug(*args, **kwargs): - if DEBUG: - print(*args, **kwargs) - - -def check_url_downloadable(session, url): - headers = session.head(url).headers - downloadable = "attachment" in headers.get("Content-Disposition", "") - - content_type = headers.get("content-type") - if "xml" in content_type.lower(): - downloadable = False - elif "gzip" in content_type.lower(): - downloadable = True - - return downloadable - - -def check_analogous_urls(session, version): - url_tarballs = ( - "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-" - + version - + ".pkg.tar.gz" - ) - - url_downloadable = check_url_downloadable(session, url_tarballs) - print_debug("Checking tarballs URL: ", url_downloadable) - print_debug(url_tarballs) - if url_downloadable: - return url_tarballs - - url_enterprise = ( - "https://cfengine-package-repos.s3.amazonaws.com/enterprise/Enterprise-" - + version - + "/misc/cfengine-masterfiles-" - + version - ) - - url_enterprise_0 = url_enterprise + ".pkg.tar.gz" - url_enterprise_1 = url_enterprise + "-1.pkg.tar.gz" - url_enterprise_2 = url_enterprise + "-2.pkg.tar.gz" - url_enterprise_3 = url_enterprise + "-3.pkg.tar.gz" - - print_debug( - "Checking enterprise-0 URL: ", check_url_downloadable(session, url_enterprise_0) - ) - print_debug( - "Checking enterprise-1 URL: ", check_url_downloadable(session, url_enterprise_1) - ) - print_debug( - "Checking enterprise-2 URL: ", check_url_downloadable(session, url_enterprise_2) - ) - print_debug( - "Checking enterprise-3 URL: ", check_url_downloadable(session, url_enterprise_3) - ) - - return None - - # TODO # def download_all_versions_community(): -# response = session.get(COMMUNITY_URL) +# data = get_json(COMMUNITY_URL) # # "masterfiles is at a different index" in 3.10.1 happens only for Enterprise, not Community def download_all_versions_enterprise(): - session = CachedSession() - response = session.get(ENTERPRISE_URL) - data = response.json() + data = get_json(ENTERPRISE_URL) urls_dict = {} reported_checksums = {} - for dd in data["releases"]: - version = dd["version"] - print_debug(version) - release_url = dd["URL"] - print_debug(release_url) + for releases_data in data["releases"]: + version = releases_data["version"] + release_url = releases_data["URL"] - subresponse = session.get(release_url) - subdata = subresponse.json() + subdata = get_json(release_url) - subdd = subdata["artifacts"] - if "Additional Assets" not in subdd: - print_debug("Warning: no Additional Assets!") + artifacts_data = subdata["artifacts"] + if "Additional Assets" not in artifacts_data: # happens for 3.9.0b1, 3.8.0b1, 3.6.1, 3.6.0 - if DEBUG: - check_analogous_urls(session, version) - download_url = None else: @@ -109,27 +40,23 @@ def download_all_versions_enterprise(): else: # there's precisely one version (3.10.1) for which masterfiles is at a different index if version == "3.10.1": - subdd = subdd["Additional Assets"][1] + artifacts_data = artifacts_data["Additional Assets"][1] else: - subdd = subdd["Additional Assets"][0] + artifacts_data = artifacts_data["Additional Assets"][0] - if subdd["Title"] != "Masterfiles ready-to-install tarball": - print_debug("Warning: not masterfiles!") + if artifacts_data["Title"] != "Masterfiles ready-to-install tarball": # happens for 3.10.1, 3.9.2, 3.9.0, 3.8.2, 3.8.1, 3.8.0, 3.6.2--3.7.4 - if DEBUG: - check_analogous_urls(session, version) # 3.10.1: see above # 3.9.2: no masterfiles listed, but an analogous hidden URL exists - # 3.9.0 and others: no masterfiles listed, and an analogous hidden URLs seemingly do not exist + # 3.9.0 and others: no masterfiles listed, and analogous hidden URLs seemingly do not exist if version == "3.9.2": download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.9.2.pkg.tar.gz" else: download_url = None else: - download_url = subdd["URL"] - reported_checksums[version] = subdd["SHA256"] + download_url = artifacts_data["URL"] + reported_checksums[version] = artifacts_data["SHA256"] - print_debug(download_url) if download_url is not None: urls_dict[version] = download_url @@ -151,9 +78,9 @@ def download_all_versions_enterprise(): filename = url.split("/")[-1] tarball_path = version_path / filename - urlretrieve(url, tarball_path) + urllib.request.urlretrieve(url, tarball_path) - unpack_archive(tarball_path, version_path / "tarball") + shutil.unpack_archive(tarball_path, version_path / "tarball") # for local verification of the reported (Enterprise) (.pkg.tar.gz) checksums return downloaded_versions, reported_checksums diff --git a/masterfiles/generate_release_information.py b/masterfiles/generate_release_information.py index 9df5f7e..cdfa4b4 100644 --- a/masterfiles/generate_release_information.py +++ b/masterfiles/generate_release_information.py @@ -1,5 +1,5 @@ # TODO document `cfbs generate-release-information` -# this command uses several extra deps compared to the rest of cfbs +# it generates the .json data files in the cwd import sys from pathlib import Path @@ -7,14 +7,16 @@ from masterfiles.check_tarball_checksums import check_tarball_checksums from masterfiles.generate_vcf_download import generate_vcf_download from masterfiles.generate_vcf_git_checkout import generate_vcf_git_checkout -from masterfiles.check_download_matches_git import check_download_matches_git + +# commented out for now as this adds an extra dependency in its current state (dictdiffer) +# from masterfiles.check_download_matches_git import check_download_matches_git ENTERPRISE_PATH = Path("./enterprise") def generate_release_information(): - # only needs to be done once (although changes could happen afterwards), and silly to do if already have access to hosted files downloaded_versions, reported_checksums = download_all_versions_enterprise() + # TODO Community coverage: # downloaded_versions, reported_checksums = download_all_versions_community() # Enterprise 3.9.2 is downloaded but there is no reported checksum, so both args are necessary @@ -29,5 +31,6 @@ def generate_release_information(): generate_vcf_download(ENTERPRISE_PATH, downloaded_versions) generate_vcf_git_checkout(downloaded_versions) - check_download_matches_git(downloaded_versions) - # TODO automatic analysis of the difference-*.txts + # TODO automatic analysis of the difference between downloadable MPF data and git MPF data + # in its current state, this generates differences-*.txt files for each version + # check_download_matches_git(downloaded_versions)