Skip to content

Commit

Permalink
Remove extra dependencies; clean up code
Browse files Browse the repository at this point in the history
Signed-off-by: jakub-nt <175944085+jakub-nt@users.noreply.github.com>
  • Loading branch information
jakub-nt committed Oct 22, 2024
1 parent f83468d commit 6762299
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 98 deletions.
113 changes: 20 additions & 93 deletions masterfiles/download_all_versions.py
Original file line number Diff line number Diff line change
@@ -1,104 +1,35 @@
from pathlib import Path
from requests_cache import CachedSession
from shutil import unpack_archive
from urllib.request import urlretrieve
import shutil
import urllib.request

from cfbs.utils import get_json

DOWNLOAD = True
DEBUG = False

ENTERPRISE_URL = "https://cfengine.com/release-data/enterprise/releases.json"
COMMUNITY_URL = "https://cfengine.com/release-data/community/releases.json"


def print_debug(*args, **kwargs):
if DEBUG:
print(*args, **kwargs)


def check_url_downloadable(session, url):
headers = session.head(url).headers
downloadable = "attachment" in headers.get("Content-Disposition", "")

content_type = headers.get("content-type")
if "xml" in content_type.lower():
downloadable = False
elif "gzip" in content_type.lower():
downloadable = True

return downloadable


def check_analogous_urls(session, version):
url_tarballs = (
"https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-"
+ version
+ ".pkg.tar.gz"
)

url_downloadable = check_url_downloadable(session, url_tarballs)
print_debug("Checking tarballs URL: ", url_downloadable)
print_debug(url_tarballs)
if url_downloadable:
return url_tarballs

url_enterprise = (
"https://cfengine-package-repos.s3.amazonaws.com/enterprise/Enterprise-"
+ version
+ "/misc/cfengine-masterfiles-"
+ version
)

url_enterprise_0 = url_enterprise + ".pkg.tar.gz"
url_enterprise_1 = url_enterprise + "-1.pkg.tar.gz"
url_enterprise_2 = url_enterprise + "-2.pkg.tar.gz"
url_enterprise_3 = url_enterprise + "-3.pkg.tar.gz"

print_debug(
"Checking enterprise-0 URL: ", check_url_downloadable(session, url_enterprise_0)
)
print_debug(
"Checking enterprise-1 URL: ", check_url_downloadable(session, url_enterprise_1)
)
print_debug(
"Checking enterprise-2 URL: ", check_url_downloadable(session, url_enterprise_2)
)
print_debug(
"Checking enterprise-3 URL: ", check_url_downloadable(session, url_enterprise_3)
)

return None


# TODO
# def download_all_versions_community():
# response = session.get(COMMUNITY_URL)
# data = get_json(COMMUNITY_URL)
# # "masterfiles is at a different index" in 3.10.1 happens only for Enterprise, not Community


def download_all_versions_enterprise():
session = CachedSession()
response = session.get(ENTERPRISE_URL)
data = response.json()
data = get_json(ENTERPRISE_URL)

urls_dict = {}
reported_checksums = {}

for dd in data["releases"]:
version = dd["version"]
print_debug(version)
release_url = dd["URL"]
print_debug(release_url)
for releases_data in data["releases"]:
version = releases_data["version"]
release_url = releases_data["URL"]

subresponse = session.get(release_url)
subdata = subresponse.json()
subdata = get_json(release_url)

subdd = subdata["artifacts"]
if "Additional Assets" not in subdd:
print_debug("Warning: no Additional Assets!")
artifacts_data = subdata["artifacts"]
if "Additional Assets" not in artifacts_data:
# happens for 3.9.0b1, 3.8.0b1, 3.6.1, 3.6.0
if DEBUG:
check_analogous_urls(session, version)

download_url = None

else:
Expand All @@ -109,27 +40,23 @@ def download_all_versions_enterprise():
else:
# there's precisely one version (3.10.1) for which masterfiles is at a different index
if version == "3.10.1":
subdd = subdd["Additional Assets"][1]
artifacts_data = artifacts_data["Additional Assets"][1]
else:
subdd = subdd["Additional Assets"][0]
artifacts_data = artifacts_data["Additional Assets"][0]

if subdd["Title"] != "Masterfiles ready-to-install tarball":
print_debug("Warning: not masterfiles!")
if artifacts_data["Title"] != "Masterfiles ready-to-install tarball":
# happens for 3.10.1, 3.9.2, 3.9.0, 3.8.2, 3.8.1, 3.8.0, 3.6.2--3.7.4
if DEBUG:
check_analogous_urls(session, version)
# 3.10.1: see above
# 3.9.2: no masterfiles listed, but an analogous hidden URL exists
# 3.9.0 and others: no masterfiles listed, and an analogous hidden URLs seemingly do not exist
# 3.9.0 and others: no masterfiles listed, and analogous hidden URLs seemingly do not exist
if version == "3.9.2":
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.9.2.pkg.tar.gz"
else:
download_url = None
else:
download_url = subdd["URL"]
reported_checksums[version] = subdd["SHA256"]
download_url = artifacts_data["URL"]
reported_checksums[version] = artifacts_data["SHA256"]

print_debug(download_url)
if download_url is not None:
urls_dict[version] = download_url

Expand All @@ -151,9 +78,9 @@ def download_all_versions_enterprise():

filename = url.split("/")[-1]
tarball_path = version_path / filename
urlretrieve(url, tarball_path)
urllib.request.urlretrieve(url, tarball_path)

unpack_archive(tarball_path, version_path / "tarball")
shutil.unpack_archive(tarball_path, version_path / "tarball")

# for local verification of the reported (Enterprise) (.pkg.tar.gz) checksums
return downloaded_versions, reported_checksums
13 changes: 8 additions & 5 deletions masterfiles/generate_release_information.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
# TODO document `cfbs generate-release-information`
# this command uses several extra deps compared to the rest of cfbs
# it generates the .json data files in the cwd
import sys
from pathlib import Path

from masterfiles.download_all_versions import download_all_versions_enterprise
from masterfiles.check_tarball_checksums import check_tarball_checksums
from masterfiles.generate_vcf_download import generate_vcf_download
from masterfiles.generate_vcf_git_checkout import generate_vcf_git_checkout
from masterfiles.check_download_matches_git import check_download_matches_git

# commented out for now as this adds an extra dependency in its current state (dictdiffer)
# from masterfiles.check_download_matches_git import check_download_matches_git

ENTERPRISE_PATH = Path("./enterprise")


def generate_release_information():
# only needs to be done once (although changes could happen afterwards), and silly to do if already have access to hosted files
downloaded_versions, reported_checksums = download_all_versions_enterprise()
# TODO Community coverage:
# downloaded_versions, reported_checksums = download_all_versions_community()

# Enterprise 3.9.2 is downloaded but there is no reported checksum, so both args are necessary
Expand All @@ -29,5 +31,6 @@ def generate_release_information():
generate_vcf_download(ENTERPRISE_PATH, downloaded_versions)
generate_vcf_git_checkout(downloaded_versions)

check_download_matches_git(downloaded_versions)
# TODO automatic analysis of the difference-*.txts
# TODO automatic analysis of the difference between downloadable MPF data and git MPF data
# in its current state, this generates differences-*.txt files for each version
# check_download_matches_git(downloaded_versions)

0 comments on commit 6762299

Please sign in to comment.