Skip to content

Commit

Permalink
Migrate NVD importer to aboutcode pipeline
Browse files Browse the repository at this point in the history
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
  • Loading branch information
keshav-space committed Sep 11, 2024
1 parent b342145 commit 28015b8
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 34 deletions.
4 changes: 2 additions & 2 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from vulnerabilities.importers import mozilla
from vulnerabilities.importers import nginx
from vulnerabilities.importers import npm
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
from vulnerabilities.importers import oss_fuzz
from vulnerabilities.importers import postgresql
Expand All @@ -39,10 +38,11 @@
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import nvd_importer
from vulnerabilities.pipelines import pypa_importer

IMPORTERS_REGISTRY = [
nvd.NVDImporter,
nvd_importer.NVDImporterPipeline,
github.GitHubAPIImporter,
gitlab.GitLabAPIImporter,
npm.NpmImporter,
Expand Down
4 changes: 2 additions & 2 deletions vulnerabilities/improvers/vulnerability_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
from django.db.models.query import QuerySet

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importers.nvd import NVDImporter
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.models import Alias
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityChangeLog
from vulnerabilities.models import VulnerabilityStatusType
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline
from vulnerabilities.utils import fetch_response
from vulnerabilities.utils import get_item

Expand All @@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver):
@property
def interesting_advisories(self) -> QuerySet:
return (
Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name))
Advisory.objects.filter(Q(created_by=NVDImporterPipeline.qualified_name))
.distinct("aliases")
.paginated()
)
Expand Down
7 changes: 6 additions & 1 deletion vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,12 @@ def advisories_count(self) -> int:

def collect_and_store_advisories(self):
collected_advisory_count = 0
progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log)
estimated_advisory_count = self.advisories_count()

if estimated_advisory_count > 0:
self.log(f"Collecting {estimated_advisory_count:,d} advisories")

progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log)
for advisory in progress.iter(self.collect_advisories()):
if _obj := insert_advisory(
advisory=advisory,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,27 @@

import gzip
import json
import logging
from datetime import date
from traceback import format_exc as traceback_format_exc
from typing import Iterable

import attr
import requests
from dateutil import parser as dateparser

from vulnerabilities import severity_systems
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.utils import get_cwe_id
from vulnerabilities.utils import get_item


class NVDImporter(Importer):
class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from NVD."""

# See https://github.com/nexB/vulnerablecode/issues/665 for follow up
spdx_license_expression = (
"LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou"
Expand Down Expand Up @@ -61,19 +66,46 @@ class NVDImporter(Importer):
"""
importer_name = "NVD Importer"

def advisory_data(self):
for _year, cve_data in fetch_cve_data_1_1():
@classmethod
def steps(cls):
return (
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def advisories_count(self):
url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1"

advisory_count = 0
try:
response = requests.get(url)
response.raise_for_status()
data = response.json()
except requests.HTTPError as http_err:
self.log(
f"HTTP error occurred: {http_err} \n {traceback_format_exc()}",
level=logging.ERROR,
)
return advisory_count

advisory_count = data.get("totalResults", 0)
return advisory_count

def collect_advisories(self) -> Iterable[AdvisoryData]:
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
yield from to_advisories(cve_data=cve_data)


# Isolating network calls for simplicity of testing
def fetch(url):
def fetch(url, logger=None):
if logger:
logger(f"Fetching `{url}`")
gz_file = requests.get(url)
data = gzip.decompress(gz_file.content)
return json.loads(data)


def fetch_cve_data_1_1(starting_year=2002):
def fetch_cve_data_1_1(starting_year=2002, logger=None):
"""
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
year since ``starting_year`` defaulting to 2002.
Expand All @@ -82,7 +114,7 @@ def fetch_cve_data_1_1(starting_year=2002):
# NVD json feeds start from 2002.
for year in range(starting_year, current_year + 1):
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
yield year, fetch(url=download_url)
yield year, fetch(url=download_url, logger=logger)


def to_advisories(cve_data):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@
#

import json
import os
from pathlib import Path

from vulnerabilities.importers import nvd
from vulnerabilities.pipelines import nvd_importer
from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_DATA = os.path.join(BASE_DIR, "test_data/nvd/nvd_test.json")
REJECTED_CVE = os.path.join(BASE_DIR, "test_data/nvd/rejected_nvd.json")
TEST_DATA = Path(__file__).parent.parent / "test_data" / "nvd"


def load_test_data(file):
Expand All @@ -37,10 +35,11 @@ def sorted_advisory_data(advisory_data):


def test_to_advisories_skips_hardware(regen=REGEN):
expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-expected.json")
expected_file = TEST_DATA / "nvd-expected.json"

test_data = load_test_data(file=TEST_DATA)
result = [data.to_dict() for data in nvd.to_advisories(test_data)]
test_file = TEST_DATA / "nvd_test.json"
test_data = load_test_data(file=test_file)
result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)]
result = sorted_advisory_data(result)

if regen:
Expand All @@ -56,10 +55,11 @@ def test_to_advisories_skips_hardware(regen=REGEN):


def test_to_advisories_marks_rejected_cve(regen=REGEN):
expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-rejected-expected.json")
expected_file = TEST_DATA / "nvd-rejected-expected.json"

test_data = load_test_data(file=REJECTED_CVE)
result = [data.to_dict() for data in nvd.to_advisories(test_data)]
test_file = TEST_DATA / "rejected_nvd.json"
test_data = load_test_data(file=test_file)
result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)]
result = sorted_advisory_data(result)

if regen:
Expand Down Expand Up @@ -168,14 +168,16 @@ def test_CveItem_cpes():
"cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*",
]

found_cpes = nvd.CveItem(cve_item=get_test_cve_item()).cpes
found_cpes = nvd_importer.CveItem(cve_item=get_test_cve_item()).cpes
assert found_cpes == expected_cpes


def test_is_related_to_hardware():
assert nvd.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*")
assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*")
assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*")
assert nvd_importer.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*")
assert not nvd_importer.is_related_to_hardware(
"cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*"
)
assert not nvd_importer.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*")


def test_CveItem_summary_with_single_summary():
Expand All @@ -186,7 +188,7 @@ def test_CveItem_summary_with_single_summary():
"be allocated than expected."
)

assert nvd.CveItem(cve_item=get_test_cve_item()).summary == expected_summary
assert nvd_importer.CveItem(cve_item=get_test_cve_item()).summary == expected_summary


def test_CveItem_reference_urls():
Expand All @@ -195,4 +197,4 @@ def test_CveItem_reference_urls():
"http://kqueue.org/blog/2012/03/05/memory-allocator-security-revisited/",
]

assert nvd.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls
assert nvd_importer.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls
9 changes: 4 additions & 5 deletions vulnerabilities/tests/test_vulnerability_status_improver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@

import pytest

from vulnerabilities.importers.nvd import NVDImporter
from vulnerabilities.improvers.vulnerability_status import VulnerabilityStatusImprover
from vulnerabilities.improvers.vulnerability_status import get_status_from_api
from vulnerabilities.models import Advisory
from vulnerabilities.models import Alias
from vulnerabilities.models import Vulnerability
from vulnerabilities.models import VulnerabilityStatusType
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

Expand All @@ -34,13 +33,13 @@
def test_interesting_advisories():
Advisory.objects.create(
aliases=["CVE-1"],
created_by=NVDImporter.qualified_name,
created_by=NVDImporterPipeline.qualified_name,
summary="1",
date_collected=datetime.now(),
)
Advisory.objects.create(
aliases=["CVE-1"],
created_by=NVDImporter.qualified_name,
created_by=NVDImporterPipeline.qualified_name,
summary="2",
date_collected=datetime.now(),
)
Expand All @@ -55,7 +54,7 @@ def test_improver_end_to_end(mock_response):
mock_response.return_value = response
adv = Advisory.objects.create(
aliases=["CVE-2023-35866"],
created_by=NVDImporter.qualified_name,
created_by=NVDImporterPipeline.qualified_name,
summary="1",
date_collected=datetime.now(),
)
Expand Down

0 comments on commit 28015b8

Please sign in to comment.