Skip to content

Commit

Permalink
Merge branch 'main' into gsd
Browse files Browse the repository at this point in the history
  • Loading branch information
TG1999 authored Aug 6, 2024
2 parents 6f26ba2 + 53b84d1 commit 1f97d34
Show file tree
Hide file tree
Showing 119 changed files with 16,537 additions and 1,584 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ toml==0.10.2
tomli==2.0.1
traitlets==5.1.1
typing_extensions==4.1.1
univers==30.11.0
univers==30.12.0
urllib3==1.26.19
wcwidth==0.2.5
websocket-client==0.59.0
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ install_requires =

#essentials
packageurl-python>=0.10.5rc1
univers>=30.11.0
univers>=30.12.0
license-expression>=21.6.14

# file and data formats
Expand Down
11 changes: 9 additions & 2 deletions vulnerabilities/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,14 @@
class VulnerabilitySeveritySerializer(serializers.ModelSerializer):
class Meta:
model = VulnerabilitySeverity
fields = ["value", "scoring_system", "scoring_elements"]
fields = ["value", "scoring_system", "scoring_elements", "published_at"]

def to_representation(self, instance):
data = super().to_representation(instance)
published_at = data.get("published_at", None)
if not published_at:
data.pop("published_at")
return data


class VulnerabilityReferenceSerializer(serializers.ModelSerializer):
Expand All @@ -47,7 +54,7 @@ class VulnerabilityReferenceSerializer(serializers.ModelSerializer):

class Meta:
model = VulnerabilityReference
fields = ["reference_url", "reference_id", "scores", "url"]
fields = ["reference_url", "reference_id", "reference_type", "scores", "url"]


class BaseResourceSerializer(serializers.HyperlinkedModelSerializer):
Expand Down
1 change: 1 addition & 0 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver
defaults={
"value": str(severity.value),
"scoring_elements": str(severity.scoring_elements),
"published_at": str(severity.published_at),
},
)
if updated:
Expand Down
16 changes: 15 additions & 1 deletion vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,17 @@ class VulnerabilitySeverity:
system: ScoringSystem
value: str
scoring_elements: str = ""
published_at: Optional[datetime.datetime] = None

def to_dict(self):
published_at_dict = (
{"published_at": self.published_at.isoformat()} if self.published_at else {}
)
return {
"system": self.system.identifier,
"value": self.value,
"scoring_elements": self.scoring_elements,
**published_at_dict,
}

@classmethod
Expand All @@ -70,12 +75,14 @@ def from_dict(cls, severity: dict):
system=SCORING_SYSTEMS[severity["system"]],
value=severity["value"],
scoring_elements=severity.get("scoring_elements", ""),
published_at=severity.get("published_at"),
)


@dataclasses.dataclass(order=True)
class Reference:
reference_id: str = ""
reference_type: str = ""
url: str = ""
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)

Expand All @@ -85,11 +92,17 @@ def __post_init__(self):

def normalized(self):
severities = sorted(self.severities)
return Reference(reference_id=self.reference_id, url=self.url, severities=severities)
return Reference(
reference_id=self.reference_id,
url=self.url,
severities=severities,
reference_type=self.reference_type,
)

def to_dict(self):
return {
"reference_id": self.reference_id,
"reference_type": self.reference_type,
"url": self.url,
"severities": [severity.to_dict() for severity in self.severities],
}
Expand All @@ -98,6 +111,7 @@ def to_dict(self):
def from_dict(cls, ref: dict):
return cls(
reference_id=ref["reference_id"],
reference_type=ref["reference_type"],
url=ref["url"],
severities=[
VulnerabilitySeverity.from_dict(severity) for severity in ref["severities"]
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from vulnerabilities.importers import debian
from vulnerabilities.importers import debian_oval
from vulnerabilities.importers import elixir_security
from vulnerabilities.importers import epss
from vulnerabilities.importers import fireeye
from vulnerabilities.importers import gentoo
from vulnerabilities.importers import github
Expand Down Expand Up @@ -71,6 +72,7 @@
oss_fuzz.OSSFuzzImporter,
ruby.RubyImporter,
github_osv.GithubOSVImporter,
epss.EPSSImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
67 changes: 67 additions & 0 deletions vulnerabilities/importers/epss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import csv
import gzip
import logging
import urllib.request
from datetime import datetime
from typing import Iterable

from vulnerabilities import severity_systems
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity

logger = logging.getLogger(__name__)


class EPSSImporter(Importer):
"""Exploit Prediction Scoring System (EPSS) Importer"""

advisory_url = "https://epss.cyentia.com/epss_scores-current.csv.gz"
spdx_license_expression = "unknown"
importer_name = "EPSS Importer"

def advisory_data(self) -> Iterable[AdvisoryData]:
response = urllib.request.urlopen(self.advisory_url)
with gzip.open(response, "rb") as f:
lines = [l.decode("utf-8") for l in f.readlines()]

epss_reader = csv.reader(lines)
model_version, score_date = next(
epss_reader
) # score_date='score_date:2024-05-19T00:00:00+0000'
published_at = datetime.strptime(score_date[11::], "%Y-%m-%dT%H:%M:%S%z")

next(epss_reader) # skip the header row
for epss_row in epss_reader:
cve, score, percentile = epss_row

if not cve or not score or not percentile:
logger.error(f"Invalid epss row: {epss_row}")
continue

severity = VulnerabilitySeverity(
system=severity_systems.EPSS,
value=score,
scoring_elements=percentile,
published_at=published_at,
)

references = Reference(
url=f"https://api.first.org/data/v1/epss?cve={cve}",
severities=[severity],
)

yield AdvisoryData(
aliases=[cve],
references=[references],
url=self.advisory_url,
)
4 changes: 2 additions & 2 deletions vulnerabilities/importers/fireeye.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ def get_references(references):
"""
Return a list of Reference from a list of URL reference in md format
>>> get_references(["- http://1-4a.com/cgi-bin/alienform/af.cgi"])
[Reference(reference_id='', url='http://1-4a.com/cgi-bin/alienform/af.cgi', severities=[])]
[Reference(reference_id='', reference_type='', url='http://1-4a.com/cgi-bin/alienform/af.cgi', severities=[])]
>>> get_references(["- [Mitre CVE-2021-42712](https://www.cve.org/CVERecord?id=CVE-2021-42712)"])
[Reference(reference_id='', url='https://www.cve.org/CVERecord?id=CVE-2021-42712', severities=[])]
[Reference(reference_id='', reference_type='', url='https://www.cve.org/CVERecord?id=CVE-2021-42712', severities=[])]
"""
urls = []
for ref in references:
Expand Down
5 changes: 3 additions & 2 deletions vulnerabilities/importers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"PIP": "pypi",
"RUBYGEMS": "gem",
"NPM": "npm",
"RUST": "cargo",
# "GO": "golang",
}

Expand All @@ -47,7 +48,7 @@
# TODO: We will try to gather more info from GH API
# Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885
# Check https://github.com/nexB/vulnerablecode/issues/645
# set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM'}
# set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'}
# second '%s' is interesting, it will have the value '' for the first request,
GRAPHQL_QUERY_TEMPLATE = """
query{
Expand Down Expand Up @@ -139,7 +140,7 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]:
vendor, _, name = github_name.partition("/")
return PackageURL(type=pkg_type, namespace=vendor, name=name)

if pkg_type in ("nuget", "pypi", "gem", "golang", "npm"):
if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"):
return PackageURL(type=pkg_type, name=github_name)

logger.error(f"get_purl: Unknown package type {pkg_type}")
Expand Down
5 changes: 4 additions & 1 deletion vulnerabilities/improve_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,14 @@ def process_inferences(

reference = VulnerabilityReference.objects.get_or_none(
reference_id=ref.reference_id,
reference_type=ref.reference_type,
url=ref.url,
)

if not reference:
reference = create_valid_vulnerability_reference(
reference_id=ref.reference_id,
reference_type=ref.reference_type,
url=ref.url,
)
if not reference:
Expand Down Expand Up @@ -167,14 +169,15 @@ def process_inferences(
return inferences_processed_count


def create_valid_vulnerability_reference(url, reference_id=None):
def create_valid_vulnerability_reference(url, reference_type="", reference_id=None):
"""
Create and return a new validated VulnerabilityReference from a
``url`` and ``reference_id``.
Return None and log a warning if this is not a valid reference.
"""
reference = VulnerabilityReference(
reference_id=reference_id,
reference_type=reference_type,
url=url,
)

Expand Down
133 changes: 133 additions & 0 deletions vulnerabilities/management/commands/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
import os
from hashlib import sha512
from pathlib import Path

import saneyaml
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from packageurl import PackageURL

from vulnerabilities.models import Package

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = "export vulnerablecode data"

def add_arguments(self, parser):
parser.add_argument("path")

def handle(self, *args, **options):
if options["path"]:
git_path = Path(options["path"])
if not git_path.is_dir():
raise CommandError("Please enter a valid path")

self.export_data(git_path)

self.stdout.write(self.style.SUCCESS("Successfully exported vulnerablecode data"))

def export_data(self, git_path):
"""
export vulnerablecode data
by running `python manage.py export /path/vulnerablecode-data`
"""
self.stdout.write("Exporting vulnerablecode data")

ecosystems = [pkg.type for pkg in Package.objects.distinct("type")]

for ecosystem in ecosystems:
package_files = {} # {"package path": "data" }
vul_files = {} # {"vulnerability path": "data" }

for purl in (
Package.objects.filter(type=ecosystem)
.prefetch_related("vulnerabilities")
.paginated()
):
purl_without_version = PackageURL(
type=purl.type,
namespace=purl.namespace,
name=purl.name,
)

# ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml
pkg_filepath = (
f"./aboutcode-packages-{get_purl_hash(purl_without_version)}/{purl.type}/{purl.namespace}/{purl.name}"
f"/versions/vulnerabilities.yml"
)

package_data = {
"purl": str(purl),
"affected_by_vulnerabilities": [
vuln.vulnerability_id for vuln in purl.affected_by
],
"fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing],
}

if pkg_filepath in package_files:
package_files[pkg_filepath]["versions"].append(package_data)
else:
package_files[pkg_filepath] = {
"package": str(purl_without_version),
"versions": [package_data],
}

for vul in purl.vulnerabilities.all():
vulnerability_id = vul.vulnerability_id
# ./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml
vul_filepath = (
f"./aboutcode-vulnerabilities-{vulnerability_id[5:7]}/{vulnerability_id[10:12]}"
f"/{vulnerability_id}/{vulnerability_id}.yml"
)
vul_files[vul_filepath] = {
"vulnerability_id": vul.vulnerability_id,
"aliases": [alias.alias for alias in vul.get_aliases],
"summary": vul.summary,
"severities": [severity for severity in vul.severities.values()],
"references": [ref for ref in vul.references.values()],
"weaknesses": [
"CWE-" + str(weakness["cwe_id"]) for weakness in vul.weaknesses.values()
],
}

for items in [package_files, vul_files]:
for filepath, data in items.items():
create_file(filepath, git_path, data)

self.stdout.write(f"Successfully exported {ecosystem} data")


def create_file(filepath, git_path, data):
"""
Check if the directories exist if it doesn't exist create a new one then Create the file
./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml
./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml
./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml
"""
filepath = git_path.joinpath(filepath)
dirname = os.path.dirname(filepath)
os.makedirs(dirname, exist_ok=True)
data = saneyaml.dump(data)
with open(filepath, encoding="utf-8", mode="w") as f:
f.write(data)


def get_purl_hash(purl: PackageURL, length: int = 3) -> str:
"""
Return a short lower cased hash of a purl.
https://github.com/nexB/purldb/pull/235/files#diff-a1fd023bd42d73f56019d540f38be711255403547add15108540d70f9948dd40R154
"""
purl_bytes = str(purl).encode("utf-8")
short_hash = sha512(purl_bytes).hexdigest()[:length]
return short_hash.lower()
Loading

0 comments on commit 1f97d34

Please sign in to comment.