Skip to content

Commit

Permalink
Clean up latest code refactoring, update tests #970
Browse files Browse the repository at this point in the history
Reference: #970

Signed-off-by: John M. Horan <johnmhoran@gmail.com>
  • Loading branch information
johnmhoran committed Jan 4, 2023
1 parent ed686b5 commit 0295783
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 302 deletions.
301 changes: 2 additions & 299 deletions vulnerabilities/importers/apache_tomcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def advisory_data(self):
"""
advisories = []

for advisory_page in self.fetch_advisory_pages(self.security_updates_home):
# for advisory_page in self.fetch_advisory_pages(self.security_updates_home):
for advisory_page in self.fetch_advisory_pages():
advisories.extend(self.extract_advisories_from_page(advisory_page))

if TRACE:
Expand Down Expand Up @@ -179,83 +180,6 @@ def extract_advisories_from_page(self, apache_tomcat_advisory_html):

return advisories

# XXX: 2023-01-02 Monday 14:48:57. We need to convert this from class method to
# independent function because it's now called by another independent function and thus has no `self`.
# def to_version_ranges(self, versions_data, fixed_versions):
# constraints = []

# for version_item in versions_data:
# if "to" in version_item:
# version_item_split = version_item.split(" ")

# constraints.append(
# VersionConstraint(
# comparator=">=",
# version=MavenVersion(version_item_split[0]),
# )
# )
# constraints.append(
# VersionConstraint(
# comparator="<=",
# version=MavenVersion(version_item_split[-1]),
# )
# )

# elif "-" in version_item:
# version_item_split = version_item.split("-")

# constraints.append(
# VersionConstraint(
# comparator=">=",
# version=MavenVersion(version_item_split[0]),
# )
# )
# constraints.append(
# VersionConstraint(
# comparator="<=",
# version=MavenVersion(version_item_split[-1]),
# )
# )

# else:
# version_item_split = version_item.split(" ")

# constraints.append(
# VersionConstraint(
# comparator="=",
# version=MavenVersion(version_item_split[0]),
# )
# )

# for fixed_item in fixed_versions:
# if "-" in fixed_item and not any([i.isalpha() for i in fixed_item]):
# fixed_item_split = fixed_item.split(" ")

# constraints.append(
# VersionConstraint(
# comparator=">=",
# version=MavenVersion(fixed_item_split[0]),
# ).invert()
# )
# constraints.append(
# VersionConstraint(
# comparator="<=",
# version=MavenVersion(fixed_item_split[-1]),
# ).invert()
# )

# else:
# fixed_item_split = fixed_item.split(" ")

# constraints.append(
# VersionConstraint(
# comparator="=",
# version=MavenVersion(fixed_item_split[0]),
# ).invert()
# )

# return MavenVersionRange(constraints=constraints)


@dataclasses.dataclass(order=True)
class TomcatAdvisoryData:
Expand Down Expand Up @@ -563,224 +487,3 @@ def to_version_ranges(versions_data, fixed_versions):
)

return MavenVersionRange(constraints=constraints)

# =====================================================

# for para in para_list:
# print("type(para) = {}".format(type(para)))
# cve_p = para.find(
# "strong",
# # text=lambda text: text and text.startswith(tuple(vuln_p_list)),
# text=lambda text: text and text.startswith(severities),
# )
# # 2022-12-31 Saturday 20:16:51. Seems this is False for the missing pair of CVEs?
# if cve_p:
# cve_p_parent = cve_p.find_parent()
# print("\ncve_p = {}\n".format(cve_p))
# print("\ncve_p_parent = {}\n".format(cve_p_parent))

# # TODO: 2022-12-31 Saturday 18:58:00. Why does this omit the 1st pair, CVE-2020-9484 and CVE-2021-25329?
# # Seems cve_p_parent (just above) is also omitted for that data?
# cve_url_list = cve_p_parent.find_all("a")
# print("cve_url_list = {}\n".format(cve_url_list))

# cve_id_list = [cve_text.text for cve_text in cve_url_list]
# print("cve_id_list = {}\n".format(cve_id_list))

# for cve_id in cve_id_list:

# test_dict = {
# "aliases": [cve_id],
# "fixed_version": fixed_version,
# "affected_versions": [],
# }

# yield test_dict

# else:
# print("\n\nALERT! This para failed the `if_cve_p` test: {} \n\n".format(para))


# ========================================================
# severity_score = para.text.split(" ")[0]
# severity_score = severity_score.split(":")[0]
# print("\nseverity_score = {}\n".format(severity_score))

# print("===")

# better_cve_url_list = para.find_all("a")
# print("better_cve_url_list = {}\n".format(better_cve_url_list))

# better_cve_id_list = [cve_text.text for cve_text in better_cve_url_list]
# print("better_cve_id_list = {}\n".format(better_cve_id_list))

# for better_cve_url in para.find_all("a"):
# print("better_cve_url = {}\n".format(better_cve_url))
# print("better_cve_url.text = {}".format(better_cve_url.text))

# better_nextSiblings = para.find_next_siblings()

# print("===")

# section_parent = para.find_parent()

# cve_url_list = section_parent.find_all("a")
# print("cve_url_list = {}\n".format(cve_url_list))

# cve_id_list = [cve_text.text for cve_text in cve_url_list]
# print("cve_id_list = {}\n".format(cve_id_list))

# for cve_url in section_parent.find_all("a"):
# print("cve_url = {}\n".format(cve_url))
# print("cve_url.text = {}".format(cve_url.text))

# nextSiblings = section_parent.find_next_siblings()

# print("===")

# fixed_commit_list = []
# affected_versions = []

# for sib in better_nextSiblings:
# if "was fixed in" in sib.text or "was fixed with" in sib.text:
# print("\nnext sib (was fixed) = {}".format(sib))
# fixed_commit_list = sib.find_all("a")
# print("\nfixed_commit_list = {}".format(fixed_commit_list))

# elif "Affects" in sib.text:
# print("\nnext sib (affects) = {}\n".format(sib))

# # 2022-12-27 Tuesday 18:47:28. We need the list of `sib` elements to examine -- and test -- the raw HTML.
# self.record_of_all_affects_elements.append(sib)

# # 2022-12-27 Tuesday 14:47:51. We'll examine the affects_string and try to find and remove unwanted alpha and related characters/strings.
# # ===
# # This version is before stripping/replacing etc.
# affects_string = sib.text.split("Affects:")[-1].strip()
# print("affects_string = {}\n".format(affects_string))
# self.record_of_all_affects_strings.append(affects_string)

# affected_versions = affects_string.split(", ")
# print("> affected_versions = {}\n".format(affected_versions))
# self.record_of_all_affected_version_strings.append(affected_versions)
# # ===
# # This version is with most but not all of the stripping/replacing.
# # affects_string = sib.text.split("Affects:")[-1].strip()
# # affects_string = affects_string.replace("\n", "")
# # affects_string = " ".join(affects_string.split())
# # affects_string_no_parens = re.sub(r" ?\([^)]+\)", "", affects_string)
# # # print("affects_string = {}\n".format(affects_string))
# # # self.record_of_all_affects_strings.append(affects_string)
# # print(
# # "affects_string_no_parens = {}\n".format(affects_string_no_parens)
# # )
# # self.record_of_all_affects_strings.append(affects_string_no_parens)

# # # affected_versions = affects_string.split(", ")
# # affected_versions = affects_string_no_parens.split(", ")
# # print("> affected_versions = {}\n".format(affected_versions))
# # self.record_of_all_affected_version_strings.append(affected_versions)
# # ===

# elif sib.find_all(
# "strong",
# text=lambda text: text and text.startswith(tuple(severities)),
# ):
# break

# # Starting to flesh out this new approach.
# # for cve_id_record in cve_id_list:
# # test_advisory_dict["fixed_versions"] = fixed_versions
# # test_advisory_dict["aliases"] = [cve_id_record]

# temp_dict_list = []

# print("\n1. affected_versions = {}\n".format(affected_versions))

# for better_cve_id_record in better_cve_id_list:
# # 2022-12-26 Monday 14:41:11. This is where `test_advisory_dict = {}` belongs! Now we have data for the double-CVEs!
# test_advisory_dict = {}
# test_advisory_dict["fixed_versions"] = fixed_versions
# test_advisory_dict["aliases"] = [better_cve_id_record]

# self.updated_temp_advisory_dict_list.append(test_advisory_dict)

# print(
# "==========================> better_cve_id_record = {}".format(
# better_cve_id_record
# )
# )

# temp_dict_list.append(test_advisory_dict)

# # TODO: 2022-12-26 Monday 14:55:49. Is this where we build the "better" AdvisoryData() objects?
# better_severities = []
# better_severities.append(
# VulnerabilitySeverity(
# system=APACHE_TOMCAT,
# value=severity_score,
# scoring_elements="",
# )
# )

# print("\n2. affected_versions = {}\n".format(affected_versions))

# affected_version_range = self.to_version_ranges(
# # versions_data, fixed_versions
# affected_versions,
# # TODO: 2022-12-26 Monday 16:01:08. fix this!
# # ["1.1"],
# # ["8.5.0 to 8.5.4", " 8.0.0.RC1 to 8.0.36"],
# # ["3.0", " 3.1-3.1.1", " 3.2-3.2.1"],
# # TODO: 2022-12-26 Monday 17:56:04. This identified the problem -- a space at the start of the 2nd range!
# # ["8.5.0 to 8.5.4", "8.0.0.RC1 to 8.0.36"],
# fixed_versions,
# )
# references = [
# Reference(
# # url=f"https://cve.mitre.org/cgi-bin/cvename.cgi?name={cve_id}",
# url=f"https://cve.mitre.org/cgi-bin/cvename.cgi?name={better_cve_id_record}",
# # reference_id=cve_id,
# reference_id=better_cve_id_record,
# # severities=severities,
# severities=better_severities,
# ),
# ]

# # for commit_url in fixed_in_commits:
# for commit_url in fixed_commit_list:
# # references.append(Reference(url=commit_url))
# # TODO: 2022-12-26 Monday 17:23:38. Does this fix the error TypeError: Object of type Tag is not JSON serializable? Yes.
# references.append(Reference(url=commit_url["href"]))

# # 2022-12-26 Monday 15:37:02. Does this belong here?
# affected_packages = []

# affected_packages.append(
# AffectedPackage(
# package=PackageURL(
# type="maven",
# namespace="apache",
# name="tomcat",
# ),
# affected_version_range=affected_version_range,
# )
# )

# advisories.append(
# AdvisoryData(
# aliases=[better_cve_id_record],
# summary="",
# affected_packages=affected_packages,
# references=references,
# )
# )

# self.temp_list_of_fixed_versions.append(fixed_versions)

# yield AdvisoryData(
# aliases=aliases,
# summary="",
# affected_packages=affected_packages,
# references=references,
# )
14 changes: 11 additions & 3 deletions vulnerabilities/tests/test_apache_tomcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_DATA = os.path.join(BASE_DIR, "test_data/apache_tomcat")

security_updates_home = "https://tomcat.apache.org/security"
# security_updates_home = "https://tomcat.apache.org/security"


# Temp test to flesh out `extract_advisories_from_page()` -- the method .
Expand Down Expand Up @@ -311,9 +311,17 @@ def test_advisory_data():


def test_fetch_links():
retrieved_links = ApacheTomcatImporter().fetch_advisory_links(security_updates_home)
# retrieved_links = ApacheTomcatImporter().fetch_advisory_links(security_updates_home)
retrieved_links = ApacheTomcatImporter().fetch_advisory_links(
"https://tomcat.apache.org/security"
)

generator_result = []
for link in retrieved_links:
generator_result.append(link)

assert retrieved_links == [
assert generator_result == [
"https://tomcat.apache.org/security-11.html",
"https://tomcat.apache.org/security-10.html",
"https://tomcat.apache.org/security-9.html",
"https://tomcat.apache.org/security-8.html",
Expand Down

0 comments on commit 0295783

Please sign in to comment.