From a25614225c422c070a18742cc5712e20f9f9a30f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 26 Apr 2023 16:59:49 +0530 Subject: [PATCH] Clean imported data after import process Signed-off-by: Tushar Goel --- vulnerabilities/importer.py | 38 +++-------------------- vulnerabilities/importers/fireeye.py | 9 ++---- vulnerabilities/importers/gitlab.py | 11 +++---- vulnerabilities/importers/istio.py | 27 +++++++++------- vulnerabilities/importers/kaybee.py | 4 +-- vulnerabilities/importers/ruby.py | 4 +-- vulnerabilities/importers/rust.py | 4 +-- vulnerabilities/tests/test_data_source.py | 15 ++++----- 8 files changed, 41 insertions(+), 71 deletions(-) diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 21f4b5d26..4b6a26c46 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -288,6 +288,10 @@ class InvalidSPDXLicense(Exception): pass +class ForkError(Exception): + pass + + class Importer: """ An Importer collects data from various upstreams and returns corresponding AdvisoryData objects @@ -325,6 +329,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]: def clone(self, repo_url): try: + self.repo_url = repo_url self.vcs_response = fetch_via_vcs(repo_url) except Exception as e: msg = f"Failed to fetch {repo_url} via vcs: {e}" @@ -332,39 +337,6 @@ def clone(self, repo_url): raise ForkError(msg) from e -class ForkError(Exception): - pass - - -class GitImporter(Importer): - def __init__(self, repo_url): - super().__init__() - self.repo_url = repo_url - self.vcs_response = None - - def __enter__(self): - super().__enter__() - self.clone() - return self - - def __exit__(self): - self.vcs_response.delete() - - def clone(self): - try: - self.vcs_response = fetch_via_vcs(self.repo_url) - except Exception as e: - msg = f"Failed to fetch {self.repo_url} via vcs: {e}" - logger.error(msg) - raise ForkError(msg) from e - - def advisory_data(self) -> Iterable[AdvisoryData]: - """ - Return AdvisoryData objects corresponding to the data being imported - """ - raise NotImplementedError - - # TODO: Needs rewrite class OvalImporter(Importer): """ diff --git a/vulnerabilities/importers/fireeye.py b/vulnerabilities/importers/fireeye.py index dc9fc129f..0a833ba35 100644 --- a/vulnerabilities/importers/fireeye.py +++ b/vulnerabilities/importers/fireeye.py @@ -13,7 +13,7 @@ from typing import List from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import build_description from vulnerabilities.utils import dedupe @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -class FireyeImporter(GitImporter): +class FireyeImporter(Importer): spdx_license_expression = "CC-BY-SA-4.0 AND MIT" license_url = "https://github.com/mandiant/Vulnerability-Disclosures/blob/master/README.md" notice = """ @@ -31,11 +31,8 @@ class FireyeImporter(GitImporter): 2. MIT - For source code contained within provided CVE information """ - def __init__(self): - super().__init__(repo_url="git+https://github.com/mandiant/Vulnerability-Disclosures") - def advisory_data(self) -> Iterable[AdvisoryData]: - self.clone() + self.clone(repo_url="git+https://github.com/mandiant/Vulnerability-Disclosures") files = filter( lambda p: p.suffix in [".md", ".MD"], Path(self.vcs_response.dest_dir).glob("**/*") ) diff --git a/vulnerabilities/importers/gitlab.py b/vulnerabilities/importers/gitlab.py index 377088637..e58c4e301 100644 --- a/vulnerabilities/importers/gitlab.py +++ b/vulnerabilities/importers/gitlab.py @@ -25,7 +25,7 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import build_description @@ -48,16 +48,13 @@ GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()} -class GitLabAPIImporter(GitImporter): +class GitLabAPIImporter(Importer): spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" - def __init__(self): - super().__init__(repo_url="git+https://gitlab.com/gitlab-org/advisories-community/") - - def advisory_data(self, _keep_clone=True) -> Iterable[AdvisoryData]: + def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: try: - self.clone() + self.clone(repo_url="git+https://gitlab.com/gitlab-org/advisories-community/") base_path = Path(self.vcs_response.dest_dir) for file_path in base_path.glob("**/*.yml"): diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index 6030faeac..2c6f6cae8 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -50,20 +50,23 @@ class IstioImporter(Importer): spdx_license_expression = "Apache-2.0" license_url = "https://github.com/istio/istio.io/blob/master/LICENSE" - repo_url = "git+https://github.com/istio/istio.io/" def advisory_data(self) -> Set[AdvisoryData]: - self.clone(self.repo_url) - path = Path(self.vcs_response.dest_dir) - vuln = path / "content/en/news/security/" - for file in vuln.glob("**/*.md"): - # Istio website has files with name starting with underscore, these contain metadata - # required for rendering the website. We're not interested in these. - # See also https://github.com/nexB/vulnerablecode/issues/563 - file = str(file) - if file.endswith("_index.md"): - continue - yield from self.process_file(file) + try: + self.clone(repo_url="git+https://github.com/istio/istio.io/") + path = Path(self.vcs_response.dest_dir) + vuln = path / "content/en/news/security/" + for file in vuln.glob("**/*.md"): + # Istio website has files with name starting with underscore, these contain metadata + # required for rendering the website. We're not interested in these. + # See also https://github.com/nexB/vulnerablecode/issues/563 + file = str(file) + if file.endswith("_index.md"): + continue + yield from self.process_file(file) + finally: + if self.vcs_response: + self.vcs_response.delete() def process_file(self, path): diff --git a/vulnerabilities/importers/kaybee.py b/vulnerabilities/importers/kaybee.py index 7464a999f..1b908e4b5 100644 --- a/vulnerabilities/importers/kaybee.py +++ b/vulnerabilities/importers/kaybee.py @@ -10,13 +10,13 @@ from packageurl import PackageURL from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import load_yaml from vulnerabilities.utils import nearest_patched_package -class KaybeeImporter(GitImporter): +class KaybeeImporter(Importer): def __enter__(self): super(KaybeeImporter, self).__enter__() self._added_files, self._updated_files = self.file_changes( diff --git a/vulnerabilities/importers/ruby.py b/vulnerabilities/importers/ruby.py index 1e116e3ff..556e39140 100644 --- a/vulnerabilities/importers/ruby.py +++ b/vulnerabilities/importers/ruby.py @@ -18,14 +18,14 @@ from univers.versions import SemverVersion from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.package_managers import RubyVersionAPI from vulnerabilities.utils import load_yaml from vulnerabilities.utils import nearest_patched_package -class RubyImporter(GitImporter): +class RubyImporter(Importer): def __enter__(self): super(RubyImporter, self).__enter__() diff --git a/vulnerabilities/importers/rust.py b/vulnerabilities/importers/rust.py index 701405128..a1e97c277 100644 --- a/vulnerabilities/importers/rust.py +++ b/vulnerabilities/importers/rust.py @@ -22,13 +22,13 @@ from univers.versions import SemverVersion from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.package_managers import CratesVersionAPI from vulnerabilities.utils import nearest_patched_package -class RustImporter(GitImporter): +class RustImporter(Importer): def __enter__(self): super(RustImporter, self).__enter__() diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index b2f173029..3befa58d5 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -13,7 +13,7 @@ from packageurl import PackageURL -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import OvalImporter from vulnerabilities.oval_parser import OvalParser @@ -35,7 +35,7 @@ class MockOvalImporter(OvalImporter): spdx_license_expression = "FOO-BAR" -class MockGitImporter(GitImporter): +class MockGitImporter(Importer): spdx_license_expression = "FOO-BAR" @@ -83,12 +83,13 @@ def test__collect_pkgs(): assert found_ubuntu_pkgs == expected_ubuntu_pkgs -def clone(self): +def mock_function(repo_url): pass -@patch("vulnerabilities.importer.GitImporter.clone") +@patch("vulnerabilities.importer.fetch_via_vcs") def test_git_importer(mock_clone): - mock_clone.return_value = clone - imp = MockGitImporter("test-url") - assert imp.repo_url == "test-url" + mock_clone.return_value = mock_function + git_importer = MockGitImporter() + git_importer.clone("test-url") + assert git_importer.repo_url == "test-url"