Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: heuristic splitting on '-' for lookups #3839

Merged
merged 9 commits into from
Apr 3, 2024
65 changes: 62 additions & 3 deletions cve_bin_tool/sbom_manager/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@


class SBOMManager:
"""
SBOMManager is a class that manages the Software Bill of Materials (SBOM) data.
It provides methods for scanning SBOM files, parsing them, and retrieving vendor information.
"""

SBOMtype = ["spdx", "cyclonedx", "swid"]

sbom_data: defaultdict[ProductInfo, TriageData]
Expand Down Expand Up @@ -72,9 +77,63 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
if version != "":
# Now add vendor to create product record....
vendor_set = self.get_vendor(product)
for vendor in vendor_set:
# if vendor is not None:
parsed_data.append(ProductInfo(vendor, product, version))
if (
len(vendor_set) == 1
and vendor_set[0] == "UNKNOWN"
and "-" in product
):
# if the product have '-' in name try splitting it and try common prefixes.
found_common_prefix = False
common_prefix = (
"perl-",
"golang-",
"rubygem-",
"python-",
"py3-",
"python3-",
"python2-",
"rust-",
"nodejs-",
)
for prefix in common_prefix:
if product.startswith(prefix):
common_prefix_product = product[len(prefix) :]
common_prefix_vendor = self.get_vendor(
common_prefix_product
)
if len(common_prefix_vendor) > 1 or (
len(common_prefix_vendor) == 1
and common_prefix_vendor[0] != "UNKNOWN"
):
found_common_prefix = True
for vendor in common_prefix_vendor:
parsed_data.append(
ProductInfo(
vendor, common_prefix_product, version
)
)
break
if not found_common_prefix:
# if vendor not found after removing common prefix try splitting it
LOGGER.warning(
f"No Vendor found for {product}, trying splitted product. "
"Some results may be inaccurate due to vendor identification limitations."
)
splitted_product = product.split("-")
for sp in splitted_product:
temp = self.get_vendor(sp)
if len(temp) > 1 or (
len(temp) == 1 and temp[0] != "UNKNOWN"
):
for vendor in temp:
# if vendor is not None:
parsed_data.append(
ProductInfo(vendor, sp, version)
)
else:
for vendor in vendor_set:
# if vendor is not None:
parsed_data.append(ProductInfo(vendor, product, version))

for row in parsed_data:
self.sbom_data[row]["default"] = {
Expand Down
Loading