Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate Npm importer to aboutcode pipeline #1574

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from vulnerabilities.importers import istio
from vulnerabilities.importers import mozilla
from vulnerabilities.importers import nginx
from vulnerabilities.importers import npm
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
from vulnerabilities.importers import oss_fuzz
Expand All @@ -40,13 +39,14 @@
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import npm_importer
from vulnerabilities.pipelines import pypa_importer

IMPORTERS_REGISTRY = [
nvd.NVDImporter,
github.GitHubAPIImporter,
gitlab.GitLabAPIImporter,
npm.NpmImporter,
nginx.NginxImporter,
pysec.PyPIImporter,
alpine_linux.AlpineImporter,
Expand Down Expand Up @@ -77,6 +77,10 @@
epss.EPSSImporter,
vulnrichment.VulnrichImporter,
pypa_importer.PyPaImporterPipeline,
npm_importer.NpmImporterPipeline,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
IMPORTERS_REGISTRY = {
x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x
for x in IMPORTERS_REGISTRY
}
6 changes: 5 additions & 1 deletion vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vulnerabilities.improvers import valid_versions
from vulnerabilities.improvers import vulnerability_kev
from vulnerabilities.improvers import vulnerability_status
from vulnerabilities.pipelines import VulnerableCodePipeline
from vulnerabilities.pipelines import flag_ghost_packages

IMPROVERS_REGISTRY = [
Expand All @@ -34,4 +35,7 @@
flag_ghost_packages.FlagGhostPackagePipeline,
]

IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
IMPROVERS_REGISTRY = {
x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x
for x in IMPROVERS_REGISTRY
}
4 changes: 2 additions & 2 deletions vulnerabilities/improvers/valid_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@
from vulnerabilities.importers.gitlab import GitLabAPIImporter
from vulnerabilities.importers.istio import IstioImporter
from vulnerabilities.importers.nginx import NginxImporter
from vulnerabilities.importers.npm import NpmImporter
from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter
from vulnerabilities.importers.ruby import RubyImporter
from vulnerabilities.importers.ubuntu import UbuntuImporter
from vulnerabilities.improver import MAX_CONFIDENCE
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
from vulnerabilities.utils import clean_nginx_git_tag
from vulnerabilities.utils import get_affected_packages_by_patched_package
Expand Down Expand Up @@ -436,7 +436,7 @@ class GitHubBasicImprover(ValidVersionImprover):


class NpmImprover(ValidVersionImprover):
importer = NpmImporter
importer = NpmImporterPipeline
ignorable_versions = []


Expand Down
5 changes: 3 additions & 2 deletions vulnerabilities/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,15 @@ def import_data(self, importers):
failed_importers = []

for importer in importers:
self.stdout.write(f"Importing data using {importer.qualified_name}")
if issubclass(importer, VulnerableCodeBaseImporterPipeline):
self.stdout.write(f"Importing data using {importer.pipeline_id}")
status, error = importer().execute()
if status != 0:
self.stdout.write(error)
failed_importers.append(importer.qualified_name)
failed_importers.append(importer.pipeline_id)
continue

self.stdout.write(f"Importing data using {importer.qualified_name}")
try:
ImportRunner(importer).run()
self.stdout.write(
Expand Down
5 changes: 3 additions & 2 deletions vulnerabilities/management/commands/improve.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,15 @@ def improve_data(self, improvers):
failed_improvers = []

for improver in improvers:
self.stdout.write(f"Improving data using {improver.qualified_name}")
if issubclass(improver, VulnerableCodePipeline):
self.stdout.write(f"Improving data using {improver.pipeline_id}")
status, error = improver().execute()
if status != 0:
self.stdout.write(error)
failed_improvers.append(improver.qualified_name)
failed_improvers.append(improver.pipeline_id)
continue

self.stdout.write(f"Improving data using {improver.qualified_name}")
try:
ImproveRunner(improver_class=improver).run()
self.stdout.write(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Generated by Django 4.2.15 on 2024-09-12 12:56

from django.db import migrations

"""
Update the created_by field on Advisory from the old qualified_name
to the new pipeline_id.
"""


def update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
from vulnerabilities.pipelines.pypa_importer import PyPaImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by="vulnerabilities.importers.npm.NpmImporter").update(
created_by=NpmImporterPipeline.pipeline_id
)
Advisory.objects.filter(created_by="vulnerabilities.importers.pypa.PyPaImporter").update(
created_by=PyPaImporterPipeline.pipeline_id
)



def reverse_update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
from vulnerabilities.pipelines.pypa_importer import PyPaImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by=NpmImporterPipeline.pipeline_id).update(
created_by="vulnerabilities.importers.npm.NpmImporter"
)
Advisory.objects.filter(created_by=PyPaImporterPipeline.pipeline_id).update(
created_by="vulnerabilities.importers.pypa.PyPaImporter"
)


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0063_alter_packagechangelog_software_version_and_more"),
]

operations = [
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
]
20 changes: 12 additions & 8 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@


class VulnerableCodePipeline(BasePipeline):
pipeline_id = None # Unique Pipeline ID

def log(self, message, level=logging.INFO):
"""Log the given `message` to the current module logger and execution_log."""
now_local = datetime.now(timezone.utc).astimezone()
Expand All @@ -36,11 +38,12 @@ def log(self, message, level=logging.INFO):
self.append_to_log(message)

@classproperty
def qualified_name(cls):
"""
Fully qualified name prefixed with the module name of the pipeline used in logging.
"""
return f"{cls.__module__}.{cls.__qualname__}"
def pipeline_id(cls):
"""Return unique pipeline_id set in cls.pipeline_id"""

if cls.pipeline_id is None or cls.pipeline_id == "":
raise NotImplementedError("pipeline_id is not defined or is empty")
return cls.pipeline_id


class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
Expand All @@ -52,6 +55,7 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
Also override the ``steps`` and ``advisory_confidence`` as needed.
"""

pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module.
license_url = None
spdx_license_expression = None
repo_url = None
Expand Down Expand Up @@ -89,7 +93,7 @@ def collect_and_store_advisories(self):
for advisory in progress.iter(self.collect_advisories()):
if _obj := insert_advisory(
advisory=advisory,
pipeline_name=self.qualified_name,
pipeline_id=self.pipeline_id,
logger=self.log,
):
collected_advisory_count += 1
Expand All @@ -98,7 +102,7 @@ def collect_and_store_advisories(self):

def import_new_advisories(self):
new_advisories = Advisory.objects.filter(
created_by=self.qualified_name,
created_by=self.pipeline_id,
date_imported__isnull=True,
)

Expand All @@ -119,7 +123,7 @@ def import_advisory(self, advisory: Advisory) -> int:
try:
import_advisory(
advisory=advisory,
pipeline_name=self.qualified_name,
pipeline_id=self.pipeline_id,
confidence=self.advisory_confidence,
logger=self.log,
)
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/pipelines/flag_ghost_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
class FlagGhostPackagePipeline(VulnerableCodePipeline):
"""Detect and flag packages that do not exist upstream."""

pipeline_id = "flag_ghost_packages"

@classmethod
def steps(cls):
return (cls.flag_ghost_packages,)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,44 +11,58 @@

from pathlib import Path
from typing import Iterable
from typing import List

import pytz
from dateutil.parser import parse
from fetchcode.vcs import fetch_via_vcs
from packageurl import PackageURL
from univers.version_range import NpmVersionRange

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.severity_systems import CVSSV2
from vulnerabilities.severity_systems import CVSSV3
from vulnerabilities.utils import build_description
from vulnerabilities.utils import load_json


class NpmImporter(Importer):
class NpmImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from nodejs GitHub repository."""

pipeline_id = "npm_importer"

spdx_license_expression = "MIT"
license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md"
repo_url = "git+https://github.com/nodejs/security-wg"
importer_name = "Npm Importer"

def advisory_data(self) -> Iterable[AdvisoryData]:
try:
self.clone(self.repo_url)
path = Path(self.vcs_response.dest_dir)
@classmethod
def steps(cls):
return (
cls.clone,
cls.collect_and_store_advisories,
cls.import_new_advisories,
cls.clean_downloads,
)

def clone(self):
self.log(f"Cloning `{self.repo_url}`")
self.vcs_response = fetch_via_vcs(self.repo_url)

vuln = path / "vuln"
npm_vulns = vuln / "npm"
for file in npm_vulns.glob("*.json"):
yield from self.to_advisory_data(file)
finally:
if self.vcs_response:
self.vcs_response.delete()
def advisories_count(self):
vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"
return sum(1 for _ in vuln_directory.glob("*.json"))

def to_advisory_data(self, file: Path) -> List[AdvisoryData]:
def collect_advisories(self) -> Iterable[AdvisoryData]:
vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"

for advisory in vuln_directory.glob("*.json"):
yield from self.to_advisory_data(advisory)

def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
data = load_json(file)
id = data.get("id")
description = data.get("overview") or ""
Expand Down Expand Up @@ -144,3 +158,8 @@ def get_affected_package(self, data, package_name):
affected_version_range=affected_version_range,
fixed_version=fixed_version,
)

def clean_downloads(self):
if self.vcs_response:
self.log(f"Removing cloned repository")
self.vcs_response.delete()
6 changes: 4 additions & 2 deletions vulnerabilities/pipelines/pypa_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from fetchcode.vcs import fetch_via_vcs

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importers.osv import parse_advisory_data
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.utils import get_advisory_url

Expand All @@ -24,6 +23,8 @@
class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from PyPA GitHub repository."""

pipeline_id = "pypa_importer"

spdx_license_expression = "CC-BY-4.0"
license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE"
repo_url = "git+https://github.com/pypa/advisory-database"
Expand All @@ -47,9 +48,10 @@ def advisories_count(self):
return sum(1 for _ in vulns_directory.rglob("*.yaml"))

def collect_advisories(self) -> Iterable[AdvisoryData]:
from vulnerabilities.importers.osv import parse_advisory_data

base_directory = Path(self.vcs_response.dest_dir)
vulns_directory = base_directory / "vulns"
self.advisories_count = sum(1 for _ in vulns_directory.rglob("*.yaml"))

for advisory in vulns_directory.rglob("*.yaml"):
advisory_url = get_advisory_url(
Expand Down
10 changes: 5 additions & 5 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from vulnerabilities.models import Weakness


def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable = None):
def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):
obj = None
try:
obj, _ = Advisory.objects.get_or_create(
Expand All @@ -38,7 +38,7 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable
weaknesses=advisory.weaknesses,
url=advisory.url,
defaults={
"created_by": pipeline_name,
"created_by": pipeline_id,
"date_collected": datetime.now(timezone.utc),
},
)
Expand All @@ -55,7 +55,7 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable
@transaction.atomic
def import_advisory(
advisory: Advisory,
pipeline_name: str,
pipeline_id: str,
confidence: int = MAX_CONFIDENCE,
logger: Callable = None,
):
Expand Down Expand Up @@ -141,7 +141,7 @@ def import_advisory(
PackageRelatedVulnerability(
vulnerability=vulnerability,
package=vulnerable_package,
created_by=pipeline_name,
created_by=pipeline_id,
confidence=confidence,
fix=False,
).update_or_create(advisory=advisory)
Expand All @@ -151,7 +151,7 @@ def import_advisory(
PackageRelatedVulnerability(
vulnerability=vulnerability,
package=fixed_package,
created_by=pipeline_name,
created_by=pipeline_id,
confidence=confidence,
fix=True,
).update_or_create(advisory=advisory)
Expand Down
Loading
Loading