Skip to content

feat: Add ability to read CPE identifiers from CycloneDX triage data #3990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 184 additions & 33 deletions cve_bin_tool/sbom_manager/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@


class SBOMManager:
"""
Class: InputEngine

This class is responsible for parsing various SBOM file formats (SPDX,
CycloneDX, SWID) in the CVE Bin Tool.

Attributes:
- sbom_data (DefaultDict[ProductInfo, TriageData]): Dictionary containing parsed SBOM data.

"""

SBOMtype = ["spdx", "cyclonedx", "swid"]

sbom_data: defaultdict[ProductInfo, TriageData]
Expand All @@ -45,6 +56,14 @@ def __init__(
self.cvedb = CVEDB(version_check=False)

def scan_file(self) -> dict[ProductInfo, TriageData]:
"""
Parses the SBOM input file and returns the product information and
corresponding triage data.

Returns:
- dict[ProductInfo, TriageData]: Parsed SBOM data.

"""
self.logger.debug(
f"Processing SBOM {self.filename} of type {self.type.upper()}"
)
Expand All @@ -65,16 +84,18 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:

# Now process list of modules to create [vendor, product, version] tuples
parsed_data: list[ProductInfo] = []
for m in modules:
if m and m[0]:
# Using lower to normalize product names across databases
product, version = m[0].lower(), m[1]
if version != "":
# Now add vendor to create product record....
vendor_set = self.get_vendor(product)
for vendor in vendor_set:
# if vendor is not None:
parsed_data.append(ProductInfo(vendor, product, version))
for module_vendor, product, version in modules:
# Using lower to normalize product names across databases
product = product.lower()

if module_vendor is None:
# Now add vendor to create product record....
vendor_set = self.get_vendor(product)
for vendor in vendor_set:
# if vendor is not None:
parsed_data.append(ProductInfo(vendor, product, version))
else:
parsed_data.append(ProductInfo(module_vendor, product, version))

for row in parsed_data:
self.sbom_data[row]["default"] = {
Expand All @@ -88,9 +109,22 @@ def scan_file(self) -> dict[ProductInfo, TriageData]:
return self.sbom_data

def get_vendor(self, product: str) -> list:
"""
Get the list of vendors for the product name.

There may be more than one vendor for a given product name and all
matches are returned.

Args:
- product (str): Product name.

Returns:
- list: The list of vendors for the product

"""
vendorlist: list[str] = []
vendor_package_pair = self.cvedb.get_vendor_product_pairs(product)
if vendor_package_pair != []:
if vendor_package_pair:
# To handle multiple vendors, return all combinations of product/vendor mappings
for v in vendor_package_pair:
vendor = v["vendor"]
Expand All @@ -99,13 +133,34 @@ def get_vendor(self, product: str) -> list:
vendorlist.append("UNKNOWN")
return vendorlist

def is_valid_purl(self, purl_string):
"""Returns true if give purl_string is a valid purl string"""
purl_pattern = r"^\w+://[\w\-.]+/[\w\-.]+(?:/[\w\-.]+)*(?:\?[\w\-.]+=[\w\-.]+(?:&[\w\-.]+=[\w\-.]+)*)?$"
def is_valid_purl(self, purl_string: str):
"""
Validate the PURL string is the correct form.

Args:
- purl_string (str): Package URL string

Returns:
- bool: True if the purl_string parameter is a valid purl string, False otherwise.

"""
purl_pattern = r"^(?P<scheme>.+):(?P<type>.+)/(?P<namespace>.+)/(?P<name>.+)@(?P<version>.+)\??(?P<qualifiers>.*)#?(?P<subpath>.*)$"
return re.match(purl_pattern, purl_string) is not None

def parse_sbom(self):
"""parse SBOM, using PURL identifiers preferentially if found"""
def parse_sbom(self) -> [(str, str, str)]:
"""
Parse the SBOM to extract a list of modules, including vendor, product, and version information.

The parsed product information can be retrieved from different components of the SBOM, with the following order of preference:
1. CPE 2.3 Identifiers
2. CPE 2.2 Identifiers
3. Package URLs (purl)
4. Name and Version from the SBOM (Vendor will be unspecified)

Returns:
- List[(str, str, str)]: A list of tuples, each containing vendor, product, and version information for a module.

"""

# Set up SBOM parser
sbom_parser = SBOMParser(sbom_type=self.type)
Expand All @@ -123,28 +178,124 @@ def parse_sbom(self):
packages = [x for x in sbom_parser.get_sbom()["packages"].values()]
LOGGER.debug(f"Parsed SBOM {self.filename} {packages}")
for package in packages:
purl_found = False
# If PURL record found, use this data in preference to package data
vendor = None
package_name = None
version = None

# If Package URL or CPE record found, use this data in preference to package data
ext_ref = package.get("externalreference")
if ext_ref is not None:
for ref in ext_ref:
if ref[1] == "purl":
if self.is_valid_purl(ref[2]):
# Process purl identifier
purl_info = PackageURL.from_string(ref[2]).to_dict()
if purl_info["name"] and purl_info["version"]:
modules.append(
[purl_info["name"], purl_info["version"]]
)
purl_found = True
if not purl_found:
if package.get("version") is not None:
modules.append([package["name"], package["version"]])
else:
LOGGER.debug(f"No version found in {package}")
vendor, package_name, version = self.parse_ext_ref(ext_ref=ext_ref)

# For any data not found in CPE or the Package URL get from package data
if not vendor:
pass # Because no vendor was detected then all vendors with this named package
# will be included in the output.

if not package_name:
package_name = package["name"]

if (not version) and (package.get("version") is not None):
version = package["version"]
else:
LOGGER.debug(f"No version found in {package}")

if version:
# Found at least package and version, save the results
modules.append([vendor, package_name, version])

LOGGER.debug(f"Parsed SBOM {self.filename} {modules}")
return modules

def parse_ext_ref(self, ext_ref) -> (str | None, str | None, str | None):
"""
Parse external references in an SBOM to extract module information.

Two passes are made through the external references, giving priority to CPE types,
which will always match the CVE database.

Args:
- ext_ref (List[List[str]]): List of lists representing external references.
Each inner list contains [category, type, locator].

Returns:
- Optional[Tuple[str | None, str | None, str | None]]: A tuple containing the vendor, product, and version
information extracted from the external references, or None if not found.

"""
decoded = {}
for ref in ext_ref:
if ref[1] == "cpe23Type":
decoded["cpe23Type"] = self.decode_cpe23(ref[2])

elif ref[1] == "cpe22Type":
decoded["cpe22Type"] = self.decode_cpe22(ref[2])

elif ref[1] == "purl":
decoded["purl"] = self.decode_purl(ref[2])

# No ext-ref matches, return none
return decoded.get(
"cpe23Type",
decoded.get("cpe22Type", decoded.get("purl", (None, None, None))),
)

def decode_cpe22(self, cpe22) -> (str | None, str | None, str | None):
"""
Decode a CPE 2.2 formatted string to extract vendor, product, and version information.

Args:
- cpe22 (str): CPE 2.2 formatted string.

Returns:
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
information extracted from the CPE 2.2 string, or None if the information is incomplete.

"""
cpe = cpe22.split(":")
vendor, product, version = cpe[2], cpe[3], cpe[4]
# Return available data, convert empty fields to None
return [vendor or None, product or None, version or None]

def decode_cpe23(self, cpe23) -> (str | None, str | None, str | None):
"""
Decode a CPE 2.3 formatted string to extract vendor, product, and version information.

Args:
- cpe23 (str): CPE 2.3 formatted string.

Returns:
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor, product, and version
information extracted from the CPE 2.3 string, or None if the information is incomplete.

"""
cpe = cpe23.split(":")
vendor, product, version = cpe[3], cpe[4], cpe[5]
# Return available data, convert empty fields to None
return [vendor or None, product or None, version or None]

def decode_purl(self, purl) -> (str | None, str | None, str | None):
"""
Decode a Package URL (purl) to extract version information.

Args:
- purl (str): Package URL (purl) string.

Returns:
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor (which is always None for purl),
product, and version information extracted from the purl string, or None if the purl is invalid or incomplete.

"""
vendor = None # Because the vendor and product identifiers in the purl don't always align
product = None # with the CVE DB, only the version is parsed.
version = None
if self.is_valid_purl(purl):
# Process purl identifier
purl_info = PackageURL.from_string(purl).to_dict()
version = purl_info.get("version")

return [vendor or None, product or None, version or None]


if __name__ == "__main__":
import sys
Expand Down
2 changes: 1 addition & 1 deletion cve_bin_tool/sbom_manager/swid_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def extract(self, swid: str) -> list[str]:
# Format of swid is "URI: <vendor>-<product>-<version>"
item = swid[swid.find(":") + 1 :].split("-")
# As some version numbers have leading 'v', it is removed
return [item[1], item[2].upper().replace("V", "")]
return [item[0].strip(" "), item[1], item[2].upper().replace("V", "")]


if __name__ == "__main__":
Expand Down
88 changes: 88 additions & 0 deletions test/sbom/cyclonedx_bad_cpe22.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"$schema": "http://cyclonedx.org/schema/bom-1.5.schema.json",
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"serialNumber": "urn:uuid:80c1b198-5175-4fda-86c8-1cc725b6c532",
"version": 1,
"metadata": {
"timestamp": "2024-03-30T18:21:29Z",
"tools": {
"components": [
{
"name": "cve-bin-tool",
"version": "3.3rc2",
"type": "application"
}
]
},
"component": {
"type": "application",
"bom-ref": "CDXRef-DOCUMENT",
"name": "SBOM_CVEBINTOOL-product_1-0-0-66_all-deb"
}
},
"components": [
{
"type": "application",
"bom-ref": "1-CVEBINTOOL-product_1-0-0-66_all-deb",
"name": "CVEBINTOOL-product_1-0-0-66_all-deb"
},
{
"type": "library",
"bom-ref": "2-libjpeg",
"name": "libjpeg-novendor",
"version": "8b",
"supplier": {
"name": "ijg"
},
"cpe": "cpe:/a::libjpeg:8b"
},
{
"type": "library",
"bom-ref": "3-libexpat",
"name": "libexpat",
"version": "2.0.1",
"supplier": {
"name": "libexpat project"
},
"cpe": "cpe:/a:libexpat_project::2.0.1"
},
{
"type": "library",
"bom-ref": "4-ncurses",
"name": "ncurses-noversion",
"version": "5.9.noversion",
"supplier": {
"name": "gnu"
},
"cpe": "cpe:/a:gnu:ncurses:"
},
{
"type": "library",
"bom-ref": "5-zlib",
"name": "zlib",
"version": "1.2.3",
"supplier": {
"name": "zlib"
},
"cpe": "cpe:/a:zlib:zlib:1.2.3"
}
],
"dependencies": [
{
"ref": "CDXRef-DOCUMENT",
"dependsOn": [
"1-CVEBINTOOL-product_1-0-0-66_all-deb"
]
},
{
"ref": "1-CVEBINTOOL-product_1-0-0-66_all-deb",
"dependsOn": [
"2-libjpeg",
"3-libexpat",
"4-ncurses",
"5-zlib"
]
}
]
}
Loading