Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from vulnerabilities.pipelines.v2_importers import (
elixir_security_importer as elixir_security_importer_v2,
)
from vulnerabilities.pipelines.v2_importers import euvd_importer as euvd_importer_v2
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
Expand Down Expand Up @@ -75,6 +76,7 @@
pysec_importer_v2.PyPIImporterPipeline,
xen_importer_v2.XenImporterPipeline,
curl_importer_v2.CurlImporterPipeline,
euvd_importer_v2.EUVDImporterPipeline,
oss_fuzz_v2.OSSFuzzImporterPipeline,
istio_importer_v2.IstioImporterPipeline,
postgresql_importer_v2.PostgreSQLImporterPipeline,
Expand Down
244 changes: 244 additions & 0 deletions vulnerabilities/pipelines/v2_importers/euvd_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import logging
import math
import time
from datetime import datetime
from http import HTTPStatus
from typing import Iterable

import requests
from dateutil import parser as dateparser

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import ReferenceV2
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
from vulnerabilities.severity_systems import SCORING_SYSTEMS

logger = logging.getLogger(__name__)


class EUVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
"""
EUVD (EU Vulnerability Database) Importer Pipeline

This pipeline imports security advisories from the European Union Vulnerability Database (EUVD).
"""

pipeline_id = "euvd_importer_v2"
spdx_license_expression = "CC-BY-4.0"
license_url = "https://www.enisa.europa.eu/about-enisa/legal-notice/"
url = "https://euvdservices.enisa.europa.eu/api/search"

def __init__(self):
super().__init__()
self._cached_data = None

@classmethod
def steps(cls):
return (cls.collect_and_store_advisories,)

def fetch_data(self):
if self._cached_data is not None:
logger.info(f"Using cached data: {len(self._cached_data)} items")
return self._cached_data
Comment on lines +51 to +53
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have _cached_data? It is because the API returns repeated data

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_cached_data prevents a second full API fetch.
The base importer calls fetch_data() once to count advisories and again to iterate through them.
Caching ensures both steps use the same dataset snapshot while avoiding duplicated network requests and API load.


all_items = []
size = 100
max_retries = 2

logger.info(f"Fetching data from EUVD API: {self.url}")

total_count = self._fetch_total_count(size, max_retries)
if total_count is None:
logger.error("Failed to fetch total count from API")
return all_items

total_pages = math.ceil(total_count / size)
logger.info(f"Total advisories: {total_count}, Total pages: {total_pages}")

first_page_data = self._fetch_page(0, size, max_retries)
if first_page_data:
all_items.extend(first_page_data)
logger.info(f"Fetched page 0: {len(first_page_data)} items (total: {len(all_items)})")

for page in range(1, total_pages):
page_data = self._fetch_page(page, size, max_retries)
if page_data is None:
logger.warning(f"Skipping page {page} after failed retries")
continue

if not page_data:
logger.info(f"No items in response for page {page}; stopping fetch.")
break

all_items.extend(page_data)
logger.info(f"Fetched page {page}: {len(page_data)} items (total: {len(all_items)})")

logger.info(f"Fetch completed successfully. Total items collected: {len(all_items)}")

self._cached_data = all_items
logger.info(f"Cached {len(all_items)} items for reuse")

return all_items

def _make_request_with_retry(self, params, max_retries, context):
headers = {"User-Agent": "VulnerableCode"}

for attempt in range(max_retries):
try:
response = requests.get(self.url, headers=headers, params=params, timeout=30)

if response.status_code != HTTPStatus.OK:
logger.error(f"API returned status {response.status_code} for {context}")
if attempt < max_retries - 1:
logger.info(f"Retrying {context} (attempt {attempt + 1}/{max_retries})")
time.sleep(3)
continue
return None

return response.json()

except requests.exceptions.Timeout:
logger.warning(f"Timeout on {context} (attempt {attempt + 1}/{max_retries})")
if attempt < max_retries - 1:
time.sleep(3)
continue
return None

except requests.exceptions.RequestException as e:
logger.error(
f"Network error on {context}: {e} (attempt {attempt + 1}/{max_retries})"
)
if attempt < max_retries - 1:
time.sleep(3)
continue
return None

except (ValueError, KeyError) as e:
logger.error(f"Error parsing response for {context}: {e}")
return None

return None

def _fetch_total_count(self, size, max_retries):
"""Fetch the total count of advisories from the API."""
params = {"size": size, "page": 0}
data = self._make_request_with_retry(params, max_retries, "total count")

if data is None:
return None

total = data.get("total")
if total is None:
logger.error("No 'total' field in API response")

return total

def _fetch_page(self, page, size, max_retries):
"""Fetch a single page of advisories from the API."""
params = {"size": size, "page": page}
data = self._make_request_with_retry(params, max_retries, f"page {page}")

if data is None:
return None

return data.get("items", [])

def advisories_count(self) -> int:
return len(self.fetch_data())

def collect_advisories(self) -> Iterable[AdvisoryData]:
for raw_data in self.fetch_data():
try:
advisory = self.parse_advisory(raw_data)
if advisory:
yield advisory
except (ValueError, KeyError, TypeError) as e:
logger.error(f"Failed to parse advisory: {e}")
logger.debug(f"Raw data: {raw_data}")
continue

def parse_advisory(self, raw_data: dict) -> AdvisoryData:
advisory_id = raw_data.get("id", "")

aliases = [advisory_id] if advisory_id else []
aliases_str = raw_data.get("aliases", "")
if aliases_str:
cve_aliases = [alias.strip() for alias in aliases_str.split("\n") if alias.strip()]
aliases.extend(cve_aliases)

summary = raw_data.get("description", "")

date_published = None
date_str = raw_data.get("datePublished", "")
if date_str:
try:
date_published = dateparser.parse(date_str)
if date_published and date_published.tzinfo is None:
date_published = date_published.replace(
tzinfo=datetime.now().astimezone().tzinfo
)
except (ValueError, TypeError) as e:
logger.warning(f"Failed to parse date '{date_str}': {e}")

references = []
references_str = raw_data.get("references", "")
if references_str:
urls = [url.strip() for url in references_str.split("\n") if url.strip()]
for url in urls:
references.append(ReferenceV2(url=url))

if advisory_id:
advisory_url = f"https://euvd.enisa.europa.eu/vulnerability/{advisory_id}"
references.append(ReferenceV2(url=advisory_url))

severities = []
base_score = raw_data.get("baseScore")
base_score_version = raw_data.get("baseScoreVersion")
base_score_vector = raw_data.get("baseScoreVector")

if base_score and base_score_version:
scoring_system = self.get_scoring_system(base_score_version)
if scoring_system:
severity = VulnerabilitySeverity(
system=scoring_system,
value=str(base_score),
scoring_elements=base_score_vector or "",
)
severities.append(severity)

return AdvisoryData(
advisory_id=advisory_id,
aliases=aliases,
summary=summary,
references_v2=references,
affected_packages=[],
date_published=date_published,
url=advisory_url if advisory_id else "",
severities=severities,
original_advisory_text=json.dumps(raw_data, indent=2, ensure_ascii=False),
)

@staticmethod
def get_scoring_system(version: str):
version_map = {
"4.0": "cvssv4",
"3.1": "cvssv3.1",
"3.0": "cvssv3",
"2.0": "cvssv2",
}
system_key = version_map.get(version)
if system_key:
return SCORING_SYSTEMS.get(system_key)
logger.warning(f"Unknown CVSS version: {version}")
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
from pathlib import Path
from unittest import TestCase
from unittest.mock import Mock
from unittest.mock import patch

from vulnerabilities.pipelines.v2_importers.euvd_importer import EUVDImporterPipeline
from vulnerabilities.tests import util_tests

TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "euvd"


class TestEUVDImporterPipeline(TestCase):
@patch("vulnerabilities.pipelines.v2_importers.euvd_importer.requests.get")
def test_collect_advisories(self, mock_get):
"""Test collecting and parsing advisories from test data"""
sample1_path = TEST_DATA / "euvd_sample1.json"

sample1 = json.loads(sample1_path.read_text(encoding="utf-8"))

mock_responses = [
Mock(status_code=200, json=lambda: sample1),
Mock(status_code=200, json=lambda: sample1),
]
mock_get.side_effect = mock_responses

pipeline = EUVDImporterPipeline()
advisories = [data.to_dict() for data in list(pipeline.collect_advisories())]

expected_file = TEST_DATA / "euvd-expected.json"
util_tests.check_results_against_json(advisories, expected_file)

def test_get_scoring_system(self):
"""Test CVSS version to scoring system mapping"""
pipeline = EUVDImporterPipeline()

system_v4 = pipeline.get_scoring_system("4.0")
assert system_v4 is not None
assert system_v4.identifier == "cvssv4"

system_v31 = pipeline.get_scoring_system("3.1")
assert system_v31 is not None
assert system_v31.identifier == "cvssv3.1"

system_v3 = pipeline.get_scoring_system("3.0")
assert system_v3 is not None
assert system_v3.identifier == "cvssv3"

system_v2 = pipeline.get_scoring_system("2.0")
assert system_v2 is not None
assert system_v2.identifier == "cvssv2"

system_unknown = pipeline.get_scoring_system("unknown")
assert system_unknown is None

@patch("vulnerabilities.pipelines.v2_importers.euvd_importer.requests.get")
def test_advisories_count(self, mock_get):
"""Test counting advisories"""
sample_data = {"items": [{"id": "1"}, {"id": "2"}, {"id": "3"}], "total": 3}
mock_responses = [
Mock(status_code=200, json=lambda: sample_data),
Mock(status_code=200, json=lambda: sample_data),
]
mock_get.side_effect = mock_responses

pipeline = EUVDImporterPipeline()
count = pipeline.advisories_count()

assert count == 3
Loading