diff --git a/.gitignore b/.gitignore index 4c8414d28..63fb1b5b0 100644 --- a/.gitignore +++ b/.gitignore @@ -46,8 +46,14 @@ coverage.xml *.log local_settings.py -# Sphinx documentation -docs/_build/ +# Sphinx +docs/_build +docs/bin +docs/build +docs/include +docs/Lib +doc/pyvenv.cfg +pyvenv.cfg # PyBuilder target/ @@ -103,3 +109,13 @@ Pipfile *.bak /.cache/ /tmp/ + +# pyenv +/.python-version +/man/ +/.pytest_cache/ +lib64 +tcl + +# Ignore Jupyter Notebook related temp files +.ipynb_checkpoints/ diff --git a/.readthedocs.yml b/.readthedocs.yml index d5dde4cfd..683f3a82a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,12 +5,25 @@ # Required version: 2 +# Build in latest ubuntu/python +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +# Build PDF & ePub +formats: + - epub + - pdf + # Where the Sphinx conf.py file is located sphinx: configuration: docs/source/conf.py -# Setting the doc build requirements +# Setting the python version and doc build requirements python: - version: "3.7" install: - - requirements: docs/requirements.txt + - method: pip + path: . + extra_requirements: + - dev diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 96afb1188..275c0b804 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,8 +2,108 @@ Release notes ============= +Version v33.6.3 +---------------- + +- We updated RTD build configuration. +- We added importer for OSS-Fuzz. +- We removed vulnerabilities with empty aliases. +- We fixed search encoding issue https://github.com/nexB/vulnerablecode/issues/1336. +- We added middleware to ban "bytedance" user-agent. + + +Version v33.6.2 +---------------- + +- We added note about CSRF_TRUSTED_ORIGINS. +- We added proper acknowledgements for NGI projects. +- We added throttling for anonymous users. + +Version v33.6.1 +---------------- + +- We added pagination to valid versions improver. + + +Version v33.6.0 +---------------- + +- We added support to write packages and vulnerabilities at the time of import. + + +Version v33.5.0 +---------------- + +- We fixed a text-overflow issue in the Essentials tab of the Vulnerability details template. +- We added clickable links to the Essentials tab of the Vulnerability details template that enable + the user to navigate to the Fixed by packages tab and the Affected packages tab. +- We fixed severity range issue for handling unknown scores. + +Version v33.4.0 +---------------- + +- We added importer specific improvers and removed default improver + additionally improve recent advisories first. + + +Version v33.3.0 +---------------- + +- We filtered out the weakness that are not presented in the + cwe2.database before passing them into the vulnerability details view. + + +Version v33.2.0 +----------------- + +- We fixed NVD importer to import the latest data by adding weakness + in unique content ID for advisories. + + +Version v33.1.0 +----------------- + +- We have paginated the default improver and added keyboard interrupt support for import and improve processes. +- We bumped PyYaml to 6.0.1 and saneyaml to 0.6.0 and dropped docker-compose. + + +Version v33.0.0 +----------------- + +- We have dropped ``unresolved_vulnerabilities`` from /api/package endpoint API response. +- We have added missing quotes for href values in template. +- We have fixed merge functionality of AffectedPackage. + + +Version v32.0.1 +----------------- + +- Clean imported data after import process. + + +Version v32.0.0 +----------------- + +- We fixed Apache HTTPD and Apache Kafka importer. +- We removed excessive network calls from Redhat importer. +- Add documentation for version 32.0.0. + + +Version v32.0.0rc4 +------------------- + +- We added loading of env for GitHub datasource in vulntotal. +- We fixed import process in github importer in vulnerablecode reported here + https://github.com/nexB/vulnerablecode/issues/1142. +- We added an improver to get all package versions + of all ecosystems for a range of affected packages. +- We added documentation for configuring throttling rate for API endpoints. +- We fixed kbmsr2019 importer. +- We added support for conan advisories through gitlab importer. + + Version v32.0.0rc3 ------------- +------------------- - Add aliases to package endpoint. - We added Apache HTTPD improver. diff --git a/README.rst b/README.rst index f41e968ee..a5a256b13 100644 --- a/README.rst +++ b/README.rst @@ -105,6 +105,7 @@ On a Debian system, use this:: git clone https://github.com/nexB/vulnerablecode.git && cd vulnerablecode make dev envfile postgres make test + source venv/bin/activate ./manage.py import vulnerabilities.importers.nginx.NginxImporter ./manage.py improve --all make run @@ -145,3 +146,20 @@ See https://creativecommons.org/licenses/by-sa/4.0/legalcode for the license tex See https://github.com/nexB/vulnerablecode for support or download. See https://aboutcode.org for more information about nexB OSS projects. + +Acknowledgements +^^^^^^^^^^^^^^^^ + +This project was funded through the NGI0 PET Fund, a fund established by +NLnet with financial support from the European Commission's Next Generation +Internet programme, under the aegis of DG Communications Networks, Content +and Technology under grant agreement No 825310. + +https://nlnet.nl/project/VulnerableCode/ + +This project was funded through the NGI0 Discovery Fund, a fund established +by NLnet with financial support from the European Commission's Next Generation +Internet programme, under the aegis of DG Communications Networks, Content +and Technology under grant agreement No 825322. + +https://nlnet.nl/project/vulnerabilitydatabase/ diff --git a/SOURCES.rst b/SOURCES.rst index 8cf65d02d..bc0963a10 100644 --- a/SOURCES.rst +++ b/SOURCES.rst @@ -13,7 +13,7 @@ +----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+ |ruby | https://github.com/rubysec/ruby-advisory-db.git |ruby gems | +----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+ -|ubuntu | https://people.canonical.com/~ubuntu-security/oval/ |ubuntu packages | +|ubuntu | |ubuntu packages | +----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+ |retiredotnet | https://github.com/RetireNet/Packages.git |.NET packages | +----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+ diff --git a/apache-2.0.LICENSE b/apache-2.0.LICENSE index d9a10c0d8..261eeb9e9 100644 --- a/apache-2.0.LICENSE +++ b/apache-2.0.LICENSE @@ -174,3 +174,28 @@ of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/docs/requirements.txt b/docs/requirements.txt index e1b67b3c3..682892887 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,3 @@ Sphinx>=3.3.1 sphinx-rtd-theme>=0.5.0 -doc8>=0.8.1 \ No newline at end of file +doc8>=0.8.1 diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 000000000..ce4aad98f --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,31 @@ +/* this is the container for the pages */ +.wy-nav-content { + max-width: 100%; + padding: 0px 40px 0px 0px; + margin-top: 0px; + background-color: #fcfcfc; +} + +.wy-nav-content-wrap { + background-color: #fcfcfc; + border-right: solid 1px #e8e8e8; +} + +div.rst-content { + max-width: 1300px; + background-color: #fcfcfc; + border: 0; + padding: 0px 80px 10px 80px; + margin-left: 50px; +} + + +@media (max-width: 768px) { + div.rst-content { + max-width: 1300px; + background-color: #fcfcfc; + border: 0; + padding: 0px 10px 10px 10px; + margin-left: 0px; + } +} diff --git a/docs/source/api-admin.rst b/docs/source/api-admin.rst new file mode 100644 index 000000000..53cf25354 --- /dev/null +++ b/docs/source/api-admin.rst @@ -0,0 +1,21 @@ +.. _api_admin: + +API usage administration for on-premise deployments +==================================================== + +Enable the API key authentication +------------------------------------ + +There is a setting VULNERABLECODEIO_REQUIRE_AUTHENTICATION for this. Use it this +way:: + + $ VULNERABLECODEIO_REQUIRE_AUTHENTICATION=1 make run + + +Create an API key-only user +------------------------------------ + +This can be done in the admin and from the command line:: + + $ ./manage.py create_api_user --email "p4@nexb.com" --first-name="Phil" --last-name "Goel" + User p4@nexb.com created with API key: ce8616b929d2adsddd6146346c2f26536423423491 diff --git a/docs/source/api.rst b/docs/source/api.rst index c779784a5..7f34c5b0b 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -11,33 +11,136 @@ Browse the Open API documentation - https://public.vulnerablecode.io/api/schema/ for the OpenAPI schema -Enable the API key authentication +How to use OpenAPI documentation +-------------------------------------- + +The API documentation is available at https://public.vulnerablecode.io/api/docs/. +To use the endpoints you need to authenticate with an API key. Request your API key +from https://public.vulnerablecode.io/account/request_api_key/. Once you have +your API key, click on the ``Authorize`` button on the top right of the page and enter +your API key in the ``value`` field with ``Token`` prefix, so if your token is "1234567890abcdef" +then you have to enter this: ``Token 1234567890abcdef``. + +.. _Package Vulnerabilities Query: + +Query for Package Vulnerabilities ------------------------------------ -There is a setting VULNERABLECODEIO_REQUIRE_AUTHENTICATION for this. Use it this -way:: +The package endpoint allows you to query vulnerabilities by package using a +purl or purl fields. - $ VULNERABLECODEIO_REQUIRE_AUTHENTICATION=1 make run +Sample python script:: + import requests -Create an API key-only user ------------------------------------- + # Query by purl + resp = requests.get( + "https://public.vulnerablecode.io/api/packages?purl=pkg:maven/log4j/log4j@1.2.27", + headers={"Authorization": "Token 123456789"}, + ).json() + + # Query by purl type, get all the vulnerable maven packages + resp = requests.get( + "https://public.vulnerablecode.io/api/packages?type=maven", + headers={"Authorization": "Token 123456789"}, + ).json() + +Sample using curl:: + + curl -X GET -H 'Authorization: Token ' https://public.vulnerablecode.io/api/packages?purl=pkg:maven/log4j/log4j@1.2.27 + + +The response will be a list of packages, these are packages +that are affected by and/or that fix a vulnerability. + + +.. _Package Bulk Search: + +Package Bulk Search +--------------------- + + +The package bulk search endpoint allows you to search for purls in bulk. You can +pass a list of purls in the request body and the endpoint will return a list of +purls with vulnerabilities. + + +You can pass a list of ``purls`` in the request body. Each package should be a +valid purl string. + +You can also pass options like ``purl_only`` and ``plain_purl`` in the request. +``purl_only`` will return only a list of vulnerable purls from the purls received in request. +``plain_purl`` allows you to query the API using plain purls by removing qualifiers +and subpath from the purl. -This can be done in the admin and from the command line:: +The request body should be a JSON object with the following structure:: - $ ./manage.py create_api_user --email "p4@nexb.com" --first-name="Phil" --last-name "Goel" - User p4@nexb.com created with API key: ce8616b929d2adsddd6146346c2f26536423423491 + { + "purls": [ + "pkg:pypi/flask@1.2.0", + "pkg:npm/express@1.0" + ], + "purl_only": false, + "plain_purl": false, + } +Sample python script:: -Access the API using curl ------------------------------ + import requests - curl -X GET -H 'Authorization: Token ' https://public.vulnerablecode.io/api/ + request_body = { + "purls": [ + "pkg:npm/grunt-radical@0.0.14" + ], + } + resp = requests.post('https://public.vulnerablecode.io/api/packages/bulk_search', json= request_body, headers={'Authorization': "Token 123456789"}).json() -API endpoints ---------------- +The response will be a list of packages, these are packages +that are affected by and/or that fix a vulnerability. + +.. _CPE Bulk Search: + +CPE Bulk Search +--------------------- + + +The CPE bulk search endpoint allows you to search for packages in bulk. +You can pass a list of packages in the request body and the endpoint will +return a list of vulnerabilities. + + +You can pass a list of ``cpes`` in the request body. Each cpe should be a +non empty string and a valid CPE. + + +The request body should be a JSON object with the following structure:: + + { + "cpes": [ + "cpe:2.3:a:apache:struts:2.3.1:*:*:*:*:*:*:*", + "cpe:2.3:a:apache:struts:2.3.2:*:*:*:*:*:*:*" + ] + } + +Sample python script:: + + import requests + + request_body = { + "cpes": [ + "cpe:2.3:a:apache:struts:2.3.1:*:*:*:*:*:*:*" + ], + } + + resp = requests.post('https://public.vulnerablecode.io/api/cpes/bulk_search', json= request_body, headers={'Authorization': "Token 123456789"}).json() + +The response will be a list of vulnerabilities that have the following CPEs. + + +API endpoints reference +-------------------------- There are two primary endpoints: @@ -48,3 +151,83 @@ There are two primary endpoints: And two secondary endpoints, used to query vulnerability aliases (such as CVEs) and vulnerability by CPEs: cpes/ and aliases/ + +.. list-table:: Table for the main API endpoints + :widths: 30 40 30 + :header-rows: 1 + + * - Endpoint + - Query Parameters + - Expected Output + * - ``/api/packages`` + - + - ``purl`` (string) = package-url of the package + - ``type`` (string) = type of the package + - ``namespace`` (string) = namespace of the package + - ``name`` (string) = name of the package + - ``version`` (string) = version of the package + - ``qualifiers`` (string) = qualifiers of the package + - ``subpath`` (string) = subpath of the package + - ``page`` (integer) = page number of the response + - ``page_size`` (integer) = number of packages in each page + - Return a list of packages using a package-url (purl) or a combination of + type, namespace, name, version, qualifiers, subpath purl fields. See the + `purl specification `_ for more details. See example at :ref:`Package Vulnerabilities Query` section for more details. + * - ``/api/packages/bulk_search`` + - Refer to package bulk search section :ref:`Package Bulk Search` + - Return a list of packages + * - ``/api/vulnerabilities/`` + - + - ``vulnerability_id`` (string) = VCID (VulnerableCode Identifier) of the vulnerability + - ``page`` (integer) = page number of the response + - ``page_size`` (integer) = number of vulnerabilities in each page + - Return a list of vulnerabilities + * - ``/api/cpes`` + - + - ``cpe`` (string) = value of the cpe + - ``page`` (integer) = page number of the response + - ``page_size`` (integer) = number of cpes in each page + - Return a list of vulnerabilities + * - ``/api/cpes/bulk_search`` + - Refer to CPE bulk search section :ref:`CPE Bulk Search` + - Return a list of cpes + * - ``/api/aliases`` + - + - ``alias`` (string) = value of the alias + - ``page`` (integer) = page number of the response + - ``page_size`` (integer) = number of aliases in each page + - Return a list of vulnerabilities + +.. list-table:: Table for other API endpoints + :widths: 30 40 30 + :header-rows: 1 + + * - Endpoint + - Query Parameters + - Expected Output + * - ``/api/packages/{id}`` + - + - ``id`` (integer) = internal primary id of the package + - Return a package with the given id + * - ``/api/packages/all`` + - No parameter required + - Return a list of all vulnerable packages + * - ``/api/vulnerabilities/{id}`` + - + - ``id`` (integer) = internal primary id of the vulnerability + - Return a vulnerability with the given id + * - ``/api/aliases/{id}`` + - + - ``id`` (integer) = internal primary id of the alias + - Return an alias with the given id + * - ``/api/cpes/{id}`` + - + - ``id`` = internal primary id of the cpe + - Return a cpe with the given id + +Miscellaneous +---------------- + +The API is paginated and the default page size is 100. You can change the page size +by passing the ``page_size`` parameter. You can also change the page number by passing +the ``page`` parameter. diff --git a/docs/source/conf.py b/docs/source/conf.py index 94f2c1d60..7141ba67d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -60,6 +60,11 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [] +# html_static_path = [] +html_static_path = ["_static"] + +html_css_files = [ + "theme_overrides.css", +] master_doc = "index" diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 18f021b4d..fa6e7075b 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -89,3 +89,577 @@ Helpful Resources on how to write good commit messages - `Pro Git book `_ - `How to write a good bug report `_ + +.. _tutorial_add_a_new_importer: + +Add a new importer +------------------- + +This tutorial contains all the things one should know to quickly implement an importer. +Many internal details about importers can be found inside the +:file:`vulnerabilites/importer.py` file. +Make sure to go through :ref:`importer-overview` before you begin writing one. + +TL;DR +------- + +#. Create a new :file:`vulnerabilities/importers/{importer_name.py}` file. +#. Create a new importer subclass inheriting from the ``Importer`` superclass defined in + ``vulnerabilites.importer``. It is conventional to end an importer name with *Importer*. +#. Specify the importer license. +#. Implement the ``advisory_data`` method to process the data source you are + writing an importer for. +#. Add the newly created importer to the importers registry at + ``vulnerabilites/importers/__init__.py`` + +.. _tutorial_add_a_new_importer_prerequisites: + +Prerequisites +-------------- + +Before writing an importer, it is important to familiarize yourself with the following concepts. + +PackageURL +^^^^^^^^^^^^ + +VulnerableCode extensively uses Package URLs to identify a package. See the +`PackageURL specification `_ and its `Python implementation +`_ for more details. + +**Example usage:** + +.. code:: python + + from packageurl import PackageURL + purl = PackageURL(name="ffmpeg", type="deb", version="1.2.3") + + +AdvisoryData +^^^^^^^^^^^^^ + +``AdvisoryData`` is an intermediate data format: +it is expected that your importer will convert the raw scraped data into ``AdvisoryData`` objects. +All the fields in ``AdvisoryData`` dataclass are optional; it is the importer's resposibility to +ensure that it contains meaningful information about a vulnerability. + +AffectedPackage +^^^^^^^^^^^^^^^^ + +``AffectedPackage`` data type is used to store a range of affected versions and a fixed version of a +given package. For all version-related data, `univers `_ library +is used. + +Univers +^^^^^^^^ + +`univers `_ is a Python implementation of the `vers specification `_. +It can parse and compare all the package versions and all the ranges, +from debian, npm, pypi, ruby and more. +It processes all the version range specs and expressions. + +Importer +^^^^^^^^^ + +All the generic importers need to implement the ``Importer`` class. +For ``Git`` or ``Oval`` data source, ``GitImporter`` or ``OvalImporter`` could be implemented. + +.. note:: + + ``GitImporter`` and ``OvalImporter`` need a complete rewrite. + Interested in :ref:`contributing` ? + +Writing an importer +--------------------- + +Create Importer Source File +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All importers are located in the :file:`vulnerabilites/importers` directory. +Create a new file to put your importer code in. +Generic importers are implemented by writing a subclass for the ``Importer`` superclass and +implementing the unimplemented methods. + +Specify the Importer License +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Importers scrape data off the internet. In order to make sure the data is useable, a license +must be provided. +Populate the ``spdx_license_expression`` with the appropriate value. +The SPDX license identifiers can be found at https://spdx.org/licenses/. + +.. note:: + An SPDX license identifier by itself is a valid licence expression. In case you need more complex + expressions, see https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/ + +Implement the ``advisory_data`` Method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``advisory_data`` method scrapes the advisories from the data source this importer is +targeted at. +It is required to return an *Iterable of AdvisoryData objects*, and thus it is a good idea to yield +from this method after creating each AdvisoryData object. + +At this point, an example importer will look like this: + +:file:`vulnerabilites/importers/example.py` + +.. code-block:: python + + from typing import Iterable + + from packageurl import PackageURL + + from vulnerabilities.importer import AdvisoryData + from vulnerabilities.importer import Importer + + + class ExampleImporter(Importer): + + spdx_license_expression = "BSD-2-Clause" + + def advisory_data(self) -> Iterable[AdvisoryData]: + return [] + +This importer is only a valid skeleton and does not import anything at all. + +Let us implement another dummy importer that actually imports some data. + +Here we have a ``dummy_package`` which follows ``NginxVersionRange`` and ``SemverVersion`` for +version management from `univers `_. + +.. note:: + + It is possible that the versioning scheme you are targeting has not yet been + implemented in the `univers `_ library. + If this is the case, you will need to head over there and implement one. + +.. code-block:: python + + from datetime import datetime + from datetime import timezone + from typing import Iterable + + import requests + from packageurl import PackageURL + from univers.version_range import NginxVersionRange + from univers.versions import SemverVersion + + from vulnerabilities.importer import AdvisoryData + from vulnerabilities.importer import AffectedPackage + from vulnerabilities.importer import Importer + from vulnerabilities.importer import Reference + from vulnerabilities.importer import VulnerabilitySeverity + from vulnerabilities.severity_systems import SCORING_SYSTEMS + + + class ExampleImporter(Importer): + + spdx_license_expression = "BSD-2-Clause" + + def advisory_data(self) -> Iterable[AdvisoryData]: + raw_data = fetch_advisory_data() + for data in raw_data: + yield parse_advisory_data(data) + + + def fetch_advisory_data(): + return [ + { + "id": "CVE-2021-23017", + "summary": "1-byte memory overwrite in resolver", + "advisory_severity": "medium", + "vulnerable": "0.6.18-1.20.0", + "fixed": "1.20.1", + "reference": "http://mailman.nginx.org/pipermail/nginx-announce/2021/000300.html", + "published_on": "14-02-2021 UTC", + }, + { + "id": "CVE-2021-1234", + "summary": "Dummy advisory", + "advisory_severity": "high", + "vulnerable": "0.6.18-1.20.0", + "fixed": "1.20.1", + "reference": "http://example.com/cve-2021-1234", + "published_on": "06-10-2021 UTC", + }, + ] + + + def parse_advisory_data(raw_data) -> AdvisoryData: + purl = PackageURL(type="example", name="dummy_package") + affected_version_range = NginxVersionRange.from_native(raw_data["vulnerable"]) + fixed_version = SemverVersion(raw_data["fixed"]) + affected_package = AffectedPackage( + package=purl, affected_version_range=affected_version_range, fixed_version=fixed_version + ) + severity = VulnerabilitySeverity( + system=SCORING_SYSTEMS["generic_textual"], value=raw_data["advisory_severity"] + ) + references = [Reference(url=raw_data["reference"], severities=[severity])] + date_published = datetime.strptime(raw_data["published_on"], "%d-%m-%Y %Z").replace( + tzinfo=timezone.utc + ) + + return AdvisoryData( + aliases=[raw_data["id"]], + summary=raw_data["summary"], + affected_packages=[affected_package], + references=references, + date_published=date_published, + ) + + +.. note:: + + | Use ``make valid`` to format your new code using black and isort automatically. + | Use ``make check`` to check for formatting errors. + +Register the Importer +^^^^^^^^^^^^^^^^^^^^^^ + +Finally, register your importer in the importer registry at +:file:`vulnerabilites/importers/__init__.py` + +.. code-block:: python + :emphasize-lines: 1, 4 + + from vulnerabilities.importers import example + from vulnerabilities.importers import nginx + + IMPORTERS_REGISTRY = [nginx.NginxImporter, example.ExampleImporter] + + IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} + +Congratulations! You have written your first importer. + +Run Your First Importer +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If everything went well, you will see your importer in the list of available importers. + +.. code-block:: console + :emphasize-lines: 5 + + $ ./manage.py import --list + + Vulnerability data can be imported from the following importers: + vulnerabilities.importers.nginx.NginxImporter + vulnerabilities.importers.example.ExampleImporter + +Now, run the importer. + +.. code-block:: console + + $ ./manage.py import vulnerabilities.importers.example.ExampleImporter + + Importing data using vulnerabilities.importers.example.ExampleImporter + Successfully imported data using vulnerabilities.importers.example.ExampleImporter + +See :ref:`command_line_interface` for command line usage instructions. + +Enable Debug Logging (Optional) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For more visibility, turn on debug logs in :file:`vulnerablecode/settings.py`. + +.. code-block:: python + + DEBUG = True + LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'DEBUG', + }, + } + +Invoke the import command now and you will see (in a fresh database): + +.. code-block:: console + + $ ./manage.py import vulnerabilities.importers.example.ExampleImporter + + Importing data using vulnerabilities.importers.example.ExampleImporter + Starting import for vulnerabilities.importers.example.ExampleImporter + [*] New Advisory with aliases: ['CVE-2021-23017'], created_by: vulnerabilities.importers.example.ExampleImporter + [*] New Advisory with aliases: ['CVE-2021-1234'], created_by: vulnerabilities.importers.example.ExampleImporter + Finished import for vulnerabilities.importers.example.ExampleImporter. Imported 2 advisories. + Successfully imported data using vulnerabilities.importers.example.ExampleImporter + +.. _tutorial_add_a_new_improver: + +Add a new improver +--------------------- + +This tutorial contains all the things one should know to quickly +implement an improver. +Many internal details about improvers can be found inside the +:file:`vulnerabilites/improver.py` file. +Make sure to go through :ref:`improver-overview` before you begin writing one. + +TL;DR +------- + +#. Locate the importer that this improver will be improving data of at + :file:`vulnerabilities/importers/{importer_name.py}` file. +#. Create a new improver subclass inheriting from the ``Improver`` superclass defined in + ``vulnerabilites.improver``. It is conventional to end an improver name with *Improver*. +#. Implement the ``interesting_advisories`` property to return a QuerySet of imported data + (``Advisory``) you are interested in. +#. Implement the ``get_inferences`` method to return an iterable of ``Inference`` objects for the + given ``AdvisoryData``. +#. Add the newly created improver to the improvers registry at + ``vulnerabilites/improvers/__init__.py``. + +Prerequisites +-------------- + +Before writing an improver, it is important to familiarize yourself with the following concepts. + +Importer +^^^^^^^^^^ + +Importers are responsible for scraping vulnerability data from various data sources without creating +a complete relational model between vulnerabilites and their fixes and storing them in a structured +fashion. These data are stored in the ``Advisory`` model and can be converted to an equivalent +``AdvisoryData`` for various use cases. +See :ref:`importer-overview` for a brief overview on importers. + +Importer Prerequisites +^^^^^^^^^^^^^^^^^^^^^^^ + +Improvers consume data produced by importers, and thus it is important to familiarize yourself with +:ref:`Importer Prerequisites `. + +Inference +^^^^^^^^^^^ + +Inferences express the contract between the improvers and the improve runner framework. +An inference is intended to contain data points about a vulnerability without any uncertainties, +which means that one inference will target one vulnerability with the specific relevant affected and +fixed packages (in the form of `PackageURLs `_). +There is no notion of version ranges here: all package versions must be explicitly specified. + +Because this concrete relationship is rarely available anywhere upstream, we have to *infer* +these values, thus the name. +As inferring something is not always perfect, an Inference also comes with a confidence score. + +Improver +^^^^^^^^^ + +All the Improvers must inherit from ``Improver`` superclass and implement the +``interesting_advisories`` property and the ``get_inferences`` method. + +Writing an improver +--------------------- + +Locate the Source File +^^^^^^^^^^^^^^^^^^^^^^^^ + +If the improver will be working on data imported by a specific importer, it will be located in +the same file at :file:`vulnerabilites/importers/{importer-name.py}`. Otherwise, if it is a +generic improver, create a new file :file:`vulnerabilites/improvers/{improver-name.py}`. + +Explore Package Managers (Optional) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your Improver depends on the discrete versions of a package, the package managers' VersionAPI +located at :file:`vulnerabilites/package_managers.py` could come in handy. You will need to +instantiate the relevant ``VersionAPI`` in the improver's constructor and use it later in the +implemented methods. See an already implemented improver (NginxBasicImprover) for an example usage. + +Implement the ``interesting_advisories`` Property +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This property is intended to return a QuerySet of ``Advisory`` on which the ``Improver`` is +designed to work. + +For example, if the improver is designed to work on Advisories imported by ``ExampleImporter``, +the property can be implemented as + +.. code-block:: python + + class ExampleBasicImprover(Improver): + + @property + def interesting_advisories(self) -> QuerySet: + return Advisory.objects.filter(created_by=ExampleImporter.qualified_name) + +Implement the ``get_inferences`` Method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The framework calls ``get_inferences`` method for every ``AdvisoryData`` that is obtained from +the ``Advisory`` QuerySet returned by the ``interesting_advisories`` property. + +It is expected to return an iterable of ``Inference`` objects for the given ``AdvisoryData``. To +avoid storing a lot of Inferences in memory, it is preferable to yield from this method. + +A very simple Improver that processes all Advisories to create the minimal relationships that can +be obtained by existing data can be found at :file:`vulnerabilites/improvers/default.py`, which is +an example of a generic improver. For a more sophisticated and targeted example, you can look +at an already implemented improver (e.g., :file:`vulnerabilites/importers/nginx.py`). + +Improvers are not limited to improving discrete versions and may also improve ``aliases``. +One such example, improving the importer written in the :ref:`importer tutorial +`, is shown below. + +.. code-block:: python + + from datetime import datetime + from datetime import timezone + from typing import Iterable + + import requests + from django.db.models.query import QuerySet + from packageurl import PackageURL + from univers.version_range import NginxVersionRange + from univers.versions import SemverVersion + + from vulnerabilities.importer import AdvisoryData + from vulnerabilities.improver import MAX_CONFIDENCE + from vulnerabilities.improver import Improver + from vulnerabilities.improver import Inference + from vulnerabilities.models import Advisory + from vulnerabilities.severity_systems import SCORING_SYSTEMS + + + class ExampleImporter(Importer): + ... + + + class ExampleAliasImprover(Improver): + @property + def interesting_advisories(self) -> QuerySet: + return Advisory.objects.filter(created_by=ExampleImporter.qualified_name) + + def get_inferences(self, advisory_data) -> Iterable[Inference]: + for alias in advisory_data.aliases: + new_aliases = fetch_additional_aliases(alias) + aliases = new_aliases + [alias] + yield Inference(aliases=aliases, confidence=MAX_CONFIDENCE) + + + def fetch_additional_aliases(alias): + alias_map = { + "CVE-2021-23017": ["PYSEC-1337", "CERTIN-1337"], + "CVE-2021-1234": ["ANONSEC-1337", "CERTDES-1337"], + } + return alias_map.get(alias) + + +.. note:: + + | Use ``make valid`` to format your new code using black and isort automatically. + | Use ``make check`` to check for formatting errrors. + +Register the Improver +^^^^^^^^^^^^^^^^^^^^^^ + +Finally, register your improver in the improver registry at +:file:`vulnerabilites/improvers/__init__.py`. + +.. code-block:: python + :emphasize-lines: 7 + + from vulnerabilities import importers + from vulnerabilities.improvers import default + + IMPROVERS_REGISTRY = [ + default.DefaultImprover, + importers.nginx.NginxBasicImprover, + importers.example.ExampleAliasImprover, + ] + + IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} + +Congratulations! You have written your first improver. + +Run Your First Improver +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If everything went well, you will see your improver in the list of available improvers. + +.. code-block:: console + :emphasize-lines: 6 + + $ ./manage.py improve --list + + Vulnerability data can be processed by these available improvers: + vulnerabilities.improvers.default.DefaultImprover + vulnerabilities.importers.nginx.NginxBasicImprover + vulnerabilities.importers.example.ExampleAliasImprover + +Before running the improver, make sure you have imported the data. An improver cannot improve if +there is nothing imported. + +.. code-block:: console + + $ ./manage.py import vulnerabilities.importers.example.ExampleImporter + + Importing data using vulnerabilities.importers.example.ExampleImporter + Successfully imported data using vulnerabilities.importers.example.ExampleImporter + +Now, run the improver. + +.. code-block:: console + + $ ./manage.py improve vulnerabilities.importers.example.ExampleAliasImprover + + Improving data using vulnerabilities.importers.example.ExampleAliasImprover + Successfully improved data using vulnerabilities.importers.example.ExampleAliasImprover + +See :ref:`command_line_interface` for command line usage instructions. + +Enable Debug Logging (Optional) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For more visibility, turn on debug logs in :file:`vulnerablecode/settings.py`. + +.. code-block:: python + + DEBUG = True + LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'DEBUG', + }, + } + +Invoke the improve command now and you will see (in a fresh database, after importing): + +.. code-block:: console + + $ ./manage.py improve vulnerabilities.importers.example.ExampleAliasImprover + + Improving data using vulnerabilities.importers.example.ExampleAliasImprover + Running improver: vulnerabilities.importers.example.ExampleAliasImprover + Improving advisory id: 1 + New alias for : PYSEC-1337 + New alias for : CVE-2021-23017 + New alias for : CERTIN-1337 + Improving advisory id: 2 + New alias for : CERTDES-1337 + New alias for : ANONSEC-1337 + New alias for : CVE-2021-1234 + Finished improving using vulnerabilities.importers.example.ExampleAliasImprover. + Successfully improved data using vulnerabilities.importers.example.ExampleAliasImprover + +.. note:: + + Even though CVE-2021-23017 and CVE-2021-1234 are not supplied by this improver, the output above shows them + because we left out running the ``DefaultImprover`` in the example. The ``DefaultImprover`` + inserts minimal data found via the importers in the database (here, the above two CVEs). Run + importer, DefaultImprover and then your improver in this sequence to avoid this anomaly. diff --git a/docs/source/images/pkg_details.png b/docs/source/images/pkg_details.png new file mode 100644 index 000000000..f1df78302 Binary files /dev/null and b/docs/source/images/pkg_details.png differ diff --git a/docs/source/images/pkg_search.png b/docs/source/images/pkg_search.png new file mode 100644 index 000000000..b152eaeaf Binary files /dev/null and b/docs/source/images/pkg_search.png differ diff --git a/docs/source/images/vuln_affected_packages.png b/docs/source/images/vuln_affected_packages.png new file mode 100644 index 000000000..d326b8af3 Binary files /dev/null and b/docs/source/images/vuln_affected_packages.png differ diff --git a/docs/source/images/vuln_details.png b/docs/source/images/vuln_details.png new file mode 100644 index 000000000..9de3459b5 Binary files /dev/null and b/docs/source/images/vuln_details.png differ diff --git a/docs/source/images/vuln_fixed_packages.png b/docs/source/images/vuln_fixed_packages.png new file mode 100644 index 000000000..428671790 Binary files /dev/null and b/docs/source/images/vuln_fixed_packages.png differ diff --git a/docs/source/images/vuln_search.png b/docs/source/images/vuln_search.png new file mode 100644 index 000000000..11cda2712 Binary files /dev/null and b/docs/source/images/vuln_search.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst index 4f32eb472..69fcffe8c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,19 +21,14 @@ In this documentation you will find information on: :caption: Getting Started introduction + user-interface installation + api + api-admin contributing faq misc -.. toctree:: - :maxdepth: 2 - :caption: Tutorial - - tutorial_add_new_importer - tutorial_add_new_improver - - .. toctree:: :maxdepth: 2 :caption: Reference Documentation @@ -43,7 +38,6 @@ In this documentation you will find information on: reference_framework_overview command-line-interface importers_link - api .. toctree:: :maxdepth: 1 diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 4f098bc3f..638276c5d 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -69,11 +69,13 @@ to run on a different port than 8000. .. note:: To access a dockerized VulnerableCode app from a remote location, the ``ALLOWED_HOSTS`` - setting need to be provided in your ``docker.env`` file:: + and ``CSRF_TRUSTED_ORIGINS`` setting need to be provided in your ``docker.env`` file:: ALLOWED_HOSTS=.domain.com,127.0.0.1 + CSRF_TRUSTED_ORIGINS=https://*.domain.com,http://127.0.0.1 - Refer to `Django ALLOWED_HOSTS settings `_ + Refer to Django `ALLOWED_HOSTS settings `_ + and `CSRF_TRUSTED_ORIGINS settings `_ for more details. .. warning:: diff --git a/docs/source/misc.rst b/docs/source/misc.rst index 279357345..c2aab16a9 100644 --- a/docs/source/misc.rst +++ b/docs/source/misc.rst @@ -46,3 +46,20 @@ VulnerableCode first checks the file at `/etc/vulnerablecode/.env` and if not present, it will attempt to load a `.env` file from the checkout directory. The file at `/etc/vulnerablecode/.env` has precedence. + + +Throttling rate configuration +------------------------------- + +The default throttling settings are defined in ``settings.py``. + +To override the default settings, add env variables in ``.env`` file +define the settings there. For example:: + + VULNERABLECODE_ALL_VULNERABLE_PACKAGES_THROTTLING_RATE = '1000/hour' + VULNERABLECODE_BULK_SEARCH_PACKAGE_THROTTLING_RATE = '10/minute' + VULNERABLECODE_PACKAGES_SEARCH_THROTTLING_RATE = '1000/second' + VULNERABLECODE_VULNERABILITIES_SEARCH_THROTTLING_RATE = '1000/hour' + VULNERABLECODE_ALIASES_SEARCH_THROTTLING_RATE = '1000/hour' + VULNERABLECODE_CPE_SEARCH_THROTTLING_RATE = '10/minute' + VULNERABLECODE_BULK_SEARCH_CPE_THROTTLING_RATE = '10/minute' diff --git a/docs/source/tutorial_add_new_importer.rst b/docs/source/tutorial_add_new_importer.rst deleted file mode 100644 index 454b60c81..000000000 --- a/docs/source/tutorial_add_new_importer.rst +++ /dev/null @@ -1,301 +0,0 @@ -.. _tutorial_add_a_new_importer: - -Add a new importer -==================== - -This tutorial contains all the things one should know to quickly implement an importer. -Many internal details about importers can be found inside the -:file:`vulnerabilites/importer.py` file. -Make sure to go through :ref:`importer-overview` before you begin writing one. - -TL;DR -------- - -#. Create a new :file:`vulnerabilities/importers/{importer_name.py}` file. -#. Create a new importer subclass inheriting from the ``Importer`` superclass defined in - ``vulnerabilites.importer``. It is conventional to end an importer name with *Importer*. -#. Specify the importer license. -#. Implement the ``advisory_data`` method to process the data source you are - writing an importer for. -#. Add the newly created importer to the importers registry at - ``vulnerabilites/importers/__init__.py`` - -.. _tutorial_add_a_new_importer_prerequisites: - -Prerequisites --------------- - -Before writing an importer, it is important to familiarize yourself with the following concepts. - -PackageURL -^^^^^^^^^^^^ - -VulnerableCode extensively uses Package URLs to identify a package. See the -`PackageURL specification `_ and its `Python implementation -`_ for more details. - -**Example usage:** - -.. code:: python - - from packageurl import PackageURL - purl = PackageURL(name="ffmpeg", type="deb", version="1.2.3") - - -AdvisoryData -^^^^^^^^^^^^^ - -``AdvisoryData`` is an intermediate data format: -it is expected that your importer will convert the raw scraped data into ``AdvisoryData`` objects. -All the fields in ``AdvisoryData`` dataclass are optional; it is the importer's resposibility to -ensure that it contains meaningful information about a vulnerability. - -AffectedPackage -^^^^^^^^^^^^^^^^ - -``AffectedPackage`` data type is used to store a range of affected versions and a fixed version of a -given package. For all version-related data, `univers `_ library -is used. - -Univers -^^^^^^^^ - -`univers `_ is a Python implementation of the `vers specification `_. -It can parse and compare all the package versions and all the ranges, -from debian, npm, pypi, ruby and more. -It processes all the version range specs and expressions. - -Importer -^^^^^^^^^ - -All the generic importers need to implement the ``Importer`` class. -For ``Git`` or ``Oval`` data source, ``GitImporter`` or ``OvalImporter`` could be implemented. - -.. note:: - - ``GitImporter`` and ``OvalImporter`` need a complete rewrite. - Interested in :ref:`contributing` ? - -Writing an importer ---------------------- - -Create Importer Source File -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -All importers are located in the :file:`vulnerabilites/importers` package. -Create a new file to put your importer code in. -Generic importers are implemented by writing a subclass for the ``Importer`` superclass and -implementing the unimplemented methods. - -Specify the Importer License -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Importers scrape data off the internet. In order to make sure the data is useable, a license -must be provided. -Populate the ``spdx_license_expression`` with the appropriate value. -The SPDX license identifiers can be found at https://spdx.org/licenses/. - -.. note:: - An SPDX license identifier by itself is a valid licence expression. In case you need more complex - expressions, see https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/ - -Implement the ``advisory_data`` Method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``advisory_data`` method scrapes the advisories from the data source this importer is -targeted at. -It is required to return an *Iterable of AdvisoryData objects*, and thus it is a good idea to yield -from this method after creating each AdvisoryData object. - -At this point, an example importer will look like this: - -:file:`vulnerabilites/importers/example.py` - -.. code-block:: python - - from typing import Iterable - - from packageurl import PackageURL - - from vulnerabilities.importer import AdvisoryData - from vulnerabilities.importer import Importer - - - class ExampleImporter(Importer): - - spdx_license_expression = "BSD-2-Clause" - - def advisory_data(self) -> Iterable[AdvisoryData]: - return [] - -This importer is only a valid skeleton and does not import anything at all. - -Let us implement another dummy importer that actually imports some data. - -Here we have a ``dummy_package`` which follows ``NginxVersionRange`` and ``SemverVersion`` for -version management from `univers `_. - -.. note:: - - It is possible that the versioning scheme you are targetting has not yet been - implemented in the `univers `_ library. - If this is the case, you will need to head over there and implement one. - -.. code-block:: python - - from datetime import datetime - from datetime import timezone - from typing import Iterable - - import requests - from packageurl import PackageURL - from univers.version_range import NginxVersionRange - from univers.versions import SemverVersion - - from vulnerabilities.importer import AdvisoryData - from vulnerabilities.importer import AffectedPackage - from vulnerabilities.importer import Importer - from vulnerabilities.importer import Reference - from vulnerabilities.importer import VulnerabilitySeverity - from vulnerabilities.severity_systems import SCORING_SYSTEMS - - - class ExampleImporter(Importer): - - spdx_license_expression = "BSD-2-Clause" - - def advisory_data(self) -> Iterable[AdvisoryData]: - raw_data = fetch_advisory_data() - for data in raw_data: - yield parse_advisory_data(data) - - - def fetch_advisory_data(): - return [ - { - "id": "CVE-2021-23017", - "summary": "1-byte memory overwrite in resolver", - "advisory_severity": "medium", - "vulnerable": "0.6.18-1.20.0", - "fixed": "1.20.1", - "reference": "http://mailman.nginx.org/pipermail/nginx-announce/2021/000300.html", - "published_on": "14-02-2021 UTC", - }, - { - "id": "CVE-2021-1234", - "summary": "Dummy advisory", - "advisory_severity": "high", - "vulnerable": "0.6.18-1.20.0", - "fixed": "1.20.1", - "reference": "http://example.com/cve-2021-1234", - "published_on": "06-10-2021 UTC", - }, - ] - - - def parse_advisory_data(raw_data) -> AdvisoryData: - purl = PackageURL(type="example", name="dummy_package") - affected_version_range = NginxVersionRange.from_native(raw_data["vulnerable"]) - fixed_version = SemverVersion(raw_data["fixed"]) - affected_package = AffectedPackage( - package=purl, affected_version_range=affected_version_range, fixed_version=fixed_version - ) - severity = VulnerabilitySeverity( - system=SCORING_SYSTEMS["generic_textual"], value=raw_data["advisory_severity"] - ) - references = [Reference(url=raw_data["reference"], severities=[severity])] - date_published = datetime.strptime(raw_data["published_on"], "%d-%m-%Y %Z").replace( - tzinfo=timezone.utc - ) - - return AdvisoryData( - aliases=[raw_data["id"]], - summary=raw_data["summary"], - affected_packages=[affected_package], - references=references, - date_published=date_published, - ) - - -.. note:: - - | Use ``make valid`` to format your new code using black and isort automatically. - | Use ``make check`` to check for formatting errrors. - -Register the Importer -^^^^^^^^^^^^^^^^^^^^^^ - -Finally, register your importer in the importer registry at -:file:`vulnerabilites/importers/__init__.py` - -.. code-block:: python - :emphasize-lines: 1, 4 - - from vulnerabilities.importers import example - from vulnerabilities.importers import nginx - - IMPORTERS_REGISTRY = [nginx.NginxImporter, example.ExampleImporter] - - IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} - -Congratulations! You have written your first importer. - -Run Your First Importer -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If everything went well, you will see your importer in the list of available importers. - -.. code-block:: console - :emphasize-lines: 5 - - $ ./manage.py import --list - - Vulnerability data can be imported from the following importers: - vulnerabilities.importers.nginx.NginxImporter - vulnerabilities.importers.example.ExampleImporter - -Now, run the importer. - -.. code-block:: console - - $ ./manage.py import vulnerabilities.importers.example.ExampleImporter - - Importing data using vulnerabilities.importers.example.ExampleImporter - Successfully imported data using vulnerabilities.importers.example.ExampleImporter - -See :ref:`command_line_interface` for command line usage instructions. - -Enable Debug Logging (Optional) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -For more visibility, turn on debug logs in :file:`vulnerablecode/settings.py`. - -.. code-block:: python - - DEBUG = True - LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - }, - }, - 'root': { - 'handlers': ['console'], - 'level': 'DEBUG', - }, - } - -Invoke the import command now and you will see (in a fresh database): - -.. code-block:: console - - $ ./manage.py import vulnerabilities.importers.example.ExampleImporter - - Importing data using vulnerabilities.importers.example.ExampleImporter - Starting import for vulnerabilities.importers.example.ExampleImporter - [*] New Advisory with aliases: ['CVE-2021-23017'], created_by: vulnerabilities.importers.example.ExampleImporter - [*] New Advisory with aliases: ['CVE-2021-1234'], created_by: vulnerabilities.importers.example.ExampleImporter - Finished import for vulnerabilities.importers.example.ExampleImporter. Imported 2 advisories. - Successfully imported data using vulnerabilities.importers.example.ExampleImporter diff --git a/docs/source/tutorial_add_new_improver.rst b/docs/source/tutorial_add_new_improver.rst deleted file mode 100644 index 16fc7beab..000000000 --- a/docs/source/tutorial_add_new_improver.rst +++ /dev/null @@ -1,271 +0,0 @@ -.. _tutorial_add_a_new_improver: - -Add a new improver -==================== - -This tutorial contains all the things one should know to quickly -implement an improver. -Many internal details about improvers can be found inside the -:file:`vulnerabilites/improver.py` file. -Make sure to go through :ref:`improver-overview` before you begin writing one. - -TL;DR -------- - -#. Locate the importer that this improver will be improving data of at - :file:`vulnerabilities/importers/{importer_name.py}` file. -#. Create a new improver subclass inheriting from the ``Improver`` superclass defined in - ``vulnerabilites.improver``. It is conventional to end an improver name with *Improver*. -#. Implement the ``interesting_advisories`` property to return a QuerySet of imported data - (``Advisory``) you are interested in. -#. Implement the ``get_inferences`` method to return an iterable of ``Inference`` objects for the - given ``AdvisoryData``. -#. Add the newly created improver to the improvers registry at - ``vulnerabilites/improvers/__init__.py``. - -Prerequisites --------------- - -Before writing an improver, it is important to familiarize yourself with the following concepts. - -Importer -^^^^^^^^^^ - -Importers are responsible for scraping vulnerability data from various data sources without creating -a complete relational model between vulnerabilites and their fixes and storing them in a structured -fashion. These data are stored in the ``Advisory`` model and can be converted to an equivalent -``AdvisoryData`` for various use cases. -See :ref:`importer-overview` for a brief overview on importers. - -Importer Prerequisites -^^^^^^^^^^^^^^^^^^^^^^^ - -Improvers consume data produced by importers, and thus it is important to familiarize yourself with -:ref:`Importer Prerequisites `. - -Inference -^^^^^^^^^^^ - -Inferences express the contract between the improvers and the improve runner framework. -An inference is intended to contain data points about a vulnerability without any uncertainties, -which means that one inference will target one vulnerability with the specific relevant affected and -fixed packages (in the form of `PackageURLs `_). -There is no notion of version ranges here: all package versions must be explicitly specified. - -Because this concrete relationship is rarely available anywhere upstream, we have to *infer* -these values, thus the name. -As inferring something is not always perfect, an Inference also comes with a confidence score. - -Improver -^^^^^^^^^ - -All the Improvers must inherit from ``Improver`` superclass and implement the -``interesting_advisories`` property and the ``get_inferences`` method. - -Writing an improver ---------------------- - -Locate the Source File -^^^^^^^^^^^^^^^^^^^^^^^^ - -If the improver will be working on data imported by a specific importer, it will be located in -the same file at :file:`vulnerabilites/importers/{importer-name.py}`. Otherwise, if it is a -generic improver, create a new file :file:`vulnerabilites/improvers/{improver-name.py}`. - -Explore Package Managers (Optional) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If your Improver depends on the discrete versions of a package, the package managers' VersionAPI -located at :file:`vulnerabilites/package_managers.py` could come in handy. You will need to -instantiate the relevant ``VersionAPI`` in the improver's constructor and use it later in the -implemented methods. See an already implemented improver (NginxBasicImprover) for an example usage. - -Implement the ``interesting_advisories`` Property -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This property is intended to return a QuerySet of ``Advisory`` on which the ``Improver`` is -designed to work. - -For example, if the improver is designed to work on Advisories imported by ``ExampleImporter``, -the property can be implemented as - -.. code-block:: python - - class ExampleBasicImprover(Improver): - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=ExampleImporter.qualified_name) - -Implement the ``get_inferences`` Method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The framework calls ``get_inferences`` method for every ``AdvisoryData`` that is obtained from -the ``Advisory`` QuerySet returned by the ``interesting_advisories`` property. - -It is expected to return an iterable of ``Inference`` objects for the given ``AdvisoryData``. To -avoid storing a lot of Inferences in memory, it is preferable to yield from this method. - -A very simple Improver that processes all Advisories to create the minimal relationships that can -be obtained by existing data can be found at :file:`vulnerabilites/improvers/default.py`, which is -an example of a generic improver. For a more sophisticated and targeted example, you can look -at an already implemented improver (e.g., :file:`vulnerabilites/importers/nginx.py`). - -Improvers are not limited to improving discrete versions and may also improve ``aliases``. -One such example, improving the importer written in the :ref:`importer tutorial -`, is shown below. - -.. code-block:: python - - from datetime import datetime - from datetime import timezone - from typing import Iterable - - import requests - from django.db.models.query import QuerySet - from packageurl import PackageURL - from univers.version_range import NginxVersionRange - from univers.versions import SemverVersion - - from vulnerabilities.importer import AdvisoryData - from vulnerabilities.improver import MAX_CONFIDENCE - from vulnerabilities.improver import Improver - from vulnerabilities.improver import Inference - from vulnerabilities.models import Advisory - from vulnerabilities.severity_systems import SCORING_SYSTEMS - - - class ExampleImporter(Importer): - ... - - - class ExampleAliasImprover(Improver): - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=ExampleImporter.qualified_name) - - def get_inferences(self, advisory_data) -> Iterable[Inference]: - for alias in advisory_data.aliases: - new_aliases = fetch_additional_aliases(alias) - aliases = new_aliases + [alias] - yield Inference(aliases=aliases, confidence=MAX_CONFIDENCE) - - - def fetch_additional_aliases(alias): - alias_map = { - "CVE-2021-23017": ["PYSEC-1337", "CERTIN-1337"], - "CVE-2021-1234": ["ANONSEC-1337", "CERTDES-1337"], - } - return alias_map.get(alias) - - -.. note:: - - | Use ``make valid`` to format your new code using black and isort automatically. - | Use ``make check`` to check for formatting errrors. - -Register the Improver -^^^^^^^^^^^^^^^^^^^^^^ - -Finally, register your improver in the improver registry at -:file:`vulnerabilites/improvers/__init__.py`. - -.. code-block:: python - :emphasize-lines: 7 - - from vulnerabilities import importers - from vulnerabilities.improvers import default - - IMPROVERS_REGISTRY = [ - default.DefaultImprover, - importers.nginx.NginxBasicImprover, - importers.example.ExampleAliasImprover, - ] - - IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} - -Congratulations! You have written your first improver. - -Run Your First Improver -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If everything went well, you will see your improver in the list of available improvers. - -.. code-block:: console - :emphasize-lines: 6 - - $ ./manage.py improve --list - - Vulnerability data can be processed by these available improvers: - vulnerabilities.improvers.default.DefaultImprover - vulnerabilities.importers.nginx.NginxBasicImprover - vulnerabilities.importers.example.ExampleAliasImprover - -Before running the improver, make sure you have imported the data. An improver cannot improve if -there is nothing imported. - -.. code-block:: console - - $ ./manage.py import vulnerabilities.importers.example.ExampleImporter - - Importing data using vulnerabilities.importers.example.ExampleImporter - Successfully imported data using vulnerabilities.importers.example.ExampleImporter - -Now, run the improver. - -.. code-block:: console - - $ ./manage.py improve vulnerabilities.importers.example.ExampleAliasImprover - - Improving data using vulnerabilities.importers.example.ExampleAliasImprover - Successfully improved data using vulnerabilities.importers.example.ExampleAliasImprover - -See :ref:`command_line_interface` for command line usage instructions. - -Enable Debug Logging (Optional) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -For more visibility, turn on debug logs in :file:`vulnerablecode/settings.py`. - -.. code-block:: python - - DEBUG = True - LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - }, - }, - 'root': { - 'handlers': ['console'], - 'level': 'DEBUG', - }, - } - -Invoke the improve command now and you will see (in a fresh database, after importing): - -.. code-block:: console - - $ ./manage.py improve vulnerabilities.importers.example.ExampleAliasImprover - - Improving data using vulnerabilities.importers.example.ExampleAliasImprover - Running improver: vulnerabilities.importers.example.ExampleAliasImprover - Improving advisory id: 1 - New alias for : PYSEC-1337 - New alias for : CVE-2021-23017 - New alias for : CERTIN-1337 - Improving advisory id: 2 - New alias for : CERTDES-1337 - New alias for : ANONSEC-1337 - New alias for : CVE-2021-1234 - Finished improving using vulnerabilities.importers.example.ExampleAliasImprover. - Successfully improved data using vulnerabilities.importers.example.ExampleAliasImprover - -.. note:: - - Even though CVE-2021-23017 and CVE-2021-1234 are not supplied by this improver, the output above shows them - because we left out running the ``DefaultImprover`` in the example. The ``DefaultImprover`` - inserts minimal data found via the importers in the database (here, the above two CVEs). Run - importer, DefaultImprover and then your improver in this sequence to avoid this anomaly. diff --git a/docs/source/user-interface.rst b/docs/source/user-interface.rst new file mode 100644 index 000000000..251896c8a --- /dev/null +++ b/docs/source/user-interface.rst @@ -0,0 +1,73 @@ +.. _user-interface: + +User Interface +================ + +.. _pkg-search: + +Search by packages +------------------ + +The search by packages is a very powerful feature of +VulnerableCode. It allows you to search for packages by the +package URL or purl prefix fragment such as +``pkg:pypi`` or by package name. + +The search by packages is available at the following URL: + + `https://public.vulnerablecode.io/packages/search `_ + +How to search by packages: + + 1. Go to the URL: `https://public.vulnerablecode.io/packages/search `_ + 2. Enter the package URL or purl prefix fragment such as ``pkg:pypi`` + or by package name in the search box. + 3. Click on the search button. + +The search results will be displayed in the table below the search box. + + .. image:: images/pkg_search.png + +Click on the package URL to view the package details. + + .. image:: images/pkg_details.png + + +.. _vuln-search: + +Search by vulnerabilities +--------------------------- + +The search by vulnerabilities is a very powerful feature of +VulnerableCode. It allows you to search for vulnerabilities by the +VCID itself. It also allows you to search for +vulnerabilities by the CVE, GHSA, CPEs etc or by the +fragment of these identifiers like ``CVE-2021``. + +The search by vulnerabilities is available at the following URL: + + `https://public.vulnerablecode.io/vulnerabilities/search `_ + +How to search by vulnerabilities: + + 1. Go to the URL: `https://public.vulnerablecode.io/vulnerabilities/search `_ + 2. Enter the VCID, CVE, GHSA, CPEs etc. in the search box. + 3. Click on the search button. + +The search results will be displayed in the table below the search box. + + .. image:: images/vuln_search.png + +Click on the VCID to view the vulnerability details. + + .. image:: images/vuln_details.png + +Affected packages tab shows the list of packages affected by the +vulnerability. + + .. image:: images/vuln_affected_packages.png + +Fixed by packages tab shows the list of packages that fix the +vulnerability. + + .. image:: images/vuln_fixed_packages.png diff --git a/requirements.txt b/requirements.txt index 93587da8d..2baddad7f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ aiosignal==1.2.0 alabaster==0.7.12 -asgiref==3.5.0 +asgiref==3.5.2 asttokens==2.0.5 async-timeout==4.0.2 attrs==21.4.0 @@ -11,16 +11,16 @@ beautifulsoup4==4.10.0 binaryornot==0.4.4 black==22.3.0 boolean.py==3.8 -certifi==2022.12.7 +certifi==2023.7.22 cffi==1.15.0 chardet==4.0.0 charset-normalizer==2.0.12 click==8.1.2 -cryptography==36.0.2 +cryptography==41.0.6 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.0.7 +Django==4.1.13 django-crispy-forms==1.10.0 django-environ==0.8.1 django-filter==21.1 @@ -28,7 +28,6 @@ django-widget-tweaks==1.4.12 djangorestframework==3.13.1 doc8==0.11.1 docker==5.0.3 -docker-compose==1.29.2 dockerpty==0.4.1 docopt==0.6.2 docutils==0.17.1 @@ -36,13 +35,13 @@ executing==0.8.3 freezegun==1.2.1 frozenlist==1.3.0 gitdb==4.0.9 -GitPython==3.1.30 +GitPython==3.1.37 gunicorn==20.1.0 idna==3.3 imagesize==1.3.0 importlib-metadata==4.11.3 iniconfig==1.1.1 -ipython==8.0.1 +ipython==8.10.0 isort==5.10.1 jedi==0.18.1 Jinja2==3.1.1 @@ -65,14 +64,14 @@ pickleshare==0.7.5 platformdirs==2.5.1 pluggy==1.0.0 pprintpp==0.4.0 -prompt-toolkit==3.0.29 +prompt-toolkit==3.0.30 psycopg2-binary==2.9.3 ptyprocess==0.7.0 pure-eval==0.2.2 py==1.11.0 pycodestyle==2.8.0 pycparser==2.21 -Pygments==2.11.2 +Pygments==2.15.0 PyNaCl==1.5.0 pyparsing==3.0.7 pyrsistent==0.18.1 @@ -81,10 +80,10 @@ pytest-django==4.5.2 python-dateutil==2.8.2 python-dotenv==0.20.0 pytz==2022.1 -PyYAML==5.4.1 -requests==2.27.1 +PyYAML==6.0.1 +requests==2.31.0 restructuredtext-lint==1.4.0 -saneyaml==0.5.2 +saneyaml==0.6.0 semantic-version==2.9.0 six==1.16.0 smmap==5.0.0 @@ -99,7 +98,7 @@ sphinxcontrib-htmlhelp==2.0.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 -sqlparse==0.4.2 +sqlparse==0.4.4 stack-data==0.2.0 stevedore==3.5.0 texttable==1.6.4 @@ -107,8 +106,8 @@ toml==0.10.2 tomli==2.0.1 traitlets==5.1.1 typing_extensions==4.1.1 -univers==30.9.1 -urllib3==1.26.9 +univers==30.10.0 +urllib3==1.26.18 wcwidth==0.2.5 websocket-client==0.59.0 yarl==1.7.2 diff --git a/setup.cfg b/setup.cfg index 651d6d6fa..f1019f1dc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 32.0.0rc3 +version = 33.6.3 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 @@ -70,12 +70,12 @@ install_requires = #essentials packageurl-python>=0.10.5rc1 - univers>=30.9.1 + univers>=30.10.0 license-expression>=21.6.14 # file and data formats binaryornot>=0.4.4 - saneyaml>=0.5.2 + saneyaml>=0.6.0 beautifulsoup4>=4.9.3 python-dateutil>=2.8.1 toml>=0.10.2 @@ -91,6 +91,11 @@ install_requires = requests>=2.25.1 fetchcode>=0.2.0 + #vulntotal + python-dotenv + texttable + + [options.extras_require] dev = # Validation @@ -107,8 +112,7 @@ dev = pytest-django>=4.5.2 freezegun>=1.1.0 # misc - docker-compose - ipython==8.0.1 + ipython==8.10.0 # used for testing commoncode # debug diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index af8e5d889..86f800e71 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -16,6 +16,8 @@ from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response +from rest_framework.throttling import AnonRateThrottle +from rest_framework.throttling import UserRateThrottle from vulnerabilities.models import Alias from vulnerabilities.models import Package @@ -46,11 +48,30 @@ class MinimalPackageSerializer(serializers.HyperlinkedModelSerializer): Used for nesting inside vulnerability focused APIs. """ + def get_affected_vulnerabilities(self, package): + parent_affected_vulnerabilities = package.fixed_package_details.get("vulnerabilities") or [] + + affected_vulnerabilities = [ + self.get_vulnerability(vuln) for vuln in parent_affected_vulnerabilities + ] + + return affected_vulnerabilities + + def get_vulnerability(self, vuln): + affected_vulnerability = {} + + vulnerability = vuln.get("vulnerability") + if vulnerability: + affected_vulnerability["vulnerability"] = vulnerability.vulnerability_id + return affected_vulnerability + + affected_by_vulnerabilities = serializers.SerializerMethodField("get_affected_vulnerabilities") + purl = serializers.CharField(source="package_url") class Meta: model = Package - fields = ["url", "purl", "is_vulnerable"] + fields = ["url", "purl", "is_vulnerable", "affected_by_vulnerabilities"] class MinimalVulnerabilitySerializer(serializers.HyperlinkedModelSerializer): @@ -97,7 +118,6 @@ class Meta: class VulnerabilitySerializer(serializers.HyperlinkedModelSerializer): - fixed_packages = MinimalPackageSerializer( many=True, source="filtered_fixed_packages", read_only=True ) @@ -124,10 +144,19 @@ class PackageSerializer(serializers.HyperlinkedModelSerializer): Lookup software package using Package URLs """ - def to_representation(self, instance): - data = super().to_representation(instance) - data["unresolved_vulnerabilities"] = data["affected_by_vulnerabilities"] - return data + next_non_vulnerable_version = serializers.SerializerMethodField("get_next_non_vulnerable") + + def get_next_non_vulnerable(self, package): + next_non_vulnerable = package.fixed_package_details.get("next_non_vulnerable", None) + if next_non_vulnerable: + return next_non_vulnerable.version + + latest_non_vulnerable_version = serializers.SerializerMethodField("get_latest_non_vulnerable") + + def get_latest_non_vulnerable(self, package): + latest_non_vulnerable = package.fixed_package_details.get("latest_non_vulnerable", None) + if latest_non_vulnerable: + return latest_non_vulnerable.version purl = serializers.CharField(source="package_url") @@ -137,7 +166,7 @@ def to_representation(self, instance): def get_fixed_packages(self, package): """ - Return a queryset of all packages that fixes a vulnerability with + Return a queryset of all packages that fix a vulnerability with same type, namespace, name, subpath and qualifiers of the `package` """ return Package.objects.filter( @@ -152,7 +181,7 @@ def get_fixed_packages(self, package): def get_vulnerabilities_for_a_package(self, package, fix) -> dict: """ Return a mapping of vulnerabilities data related to the given `package`. - Return vulnerabilities that affects the `package` if given `fix` flag is False, + Return vulnerabilities that affect the `package` if given `fix` flag is False, otherwise return vulnerabilities fixed by the `package`. """ fixed_packages = self.get_fixed_packages(package=package) @@ -178,9 +207,23 @@ def get_fixed_vulnerabilities(self, package) -> dict: def get_affected_vulnerabilities(self, package) -> dict: """ - Return a mapping of vulnerabilities that affects the given `package`. + Return a mapping of vulnerabilities that affect the given `package` (including packages that + fix each vulnerability and whose version is greater than the `package` version). """ - return self.get_vulnerabilities_for_a_package(package=package, fix=False) + excluded_purls = [] + package_vulnerabilities = self.get_vulnerabilities_for_a_package(package=package, fix=False) + + for vuln in package_vulnerabilities: + for pkg in vuln["fixed_packages"]: + real_purl = PackageURL.from_string(pkg["purl"]) + if package.version_class(real_purl.version) <= package.current_version: + excluded_purls.append(pkg) + + vuln["fixed_packages"] = [ + pkg for pkg in vuln["fixed_packages"] if pkg not in excluded_purls + ] + + return package_vulnerabilities class Meta: model = Package @@ -193,6 +236,8 @@ class Meta: "version", "qualifiers", "subpath", + "next_non_vulnerable_version", + "latest_non_vulnerable_version", "affected_by_vulnerabilities", "fixing_vulnerabilities", ] @@ -236,11 +281,10 @@ class PackageViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = PackageSerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = PackageFilterSet - throttle_classes = [StaffUserRateThrottle] - throttle_scope = "packages" + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] # TODO: Fix the swagger documentation for this endpoint - @action(detail=False, methods=["post"], throttle_scope="bulk_search_packages") + @action(detail=False, methods=["post"]) def bulk_search(self, request): """ Lookup for vulnerable packages using many Package URLs at once. @@ -294,7 +338,7 @@ def bulk_search(self, request): vulnerable_purls = [str(package.package_url) for package in vulnerable_purls] return Response(data=vulnerable_purls) - @action(detail=False, methods=["get"], throttle_scope="vulnerable_packages") + @action(detail=False, methods=["get"]) def all(self, request): """ Return the Package URLs of all packages known to be vulnerable. @@ -346,8 +390,7 @@ def get_queryset(self): serializer_class = VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = VulnerabilityFilterSet - throttle_classes = [StaffUserRateThrottle] - throttle_scope = "vulnerabilities" + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] class CPEFilterSet(filters.FilterSet): @@ -368,11 +411,10 @@ class CPEViewSet(viewsets.ReadOnlyModelViewSet): ).distinct() serializer_class = VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) - throttle_classes = [StaffUserRateThrottle] + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] filterset_class = CPEFilterSet - throttle_scope = "cpes" - @action(detail=False, methods=["post"], throttle_scope="bulk_search_cpes") + @action(detail=False, methods=["post"]) def bulk_search(self, request): """ Lookup for vulnerabilities using many CPEs at once. @@ -414,5 +456,4 @@ class AliasViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = AliasFilterSet - throttle_classes = [StaffUserRateThrottle] - throttle_scope = "aliases" + throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index cb6dcfc97..047710c2f 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -9,12 +9,26 @@ import datetime import logging +from traceback import format_exc as traceback_format_exc from typing import Iterable from typing import List +from django.core.exceptions import ValidationError +from django.db import transaction + from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer +from vulnerabilities.improver import Inference +from vulnerabilities.improvers.default import DefaultImporter from vulnerabilities.models import Advisory +from vulnerabilities.models import Alias +from vulnerabilities.models import Package +from vulnerabilities.models import PackageRelatedVulnerability +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.models import Weakness logger = logging.getLogger(__name__) @@ -41,38 +55,287 @@ def run(self) -> None: importer_class = self.importer_class logger.info(f"Starting import for {importer_name}") advisory_datas = importer_class().advisory_data() - count = process_advisories(advisory_datas=advisory_datas, importer_name=importer_name) + count = self.process_advisories(advisory_datas=advisory_datas, importer_name=importer_name) logger.info(f"Finished import for {importer_name}. Imported {count} advisories.") + def do_import(self, advisories) -> None: + advisory_importer = DefaultImporter(advisories=advisories) + logger.info(f"Running importer: {advisory_importer.qualified_name}") + importer_name = advisory_importer.qualified_name + advisories = [] + for advisory in advisory_importer.interesting_advisories: + if advisory.date_imported: + continue + logger.info(f"Processing advisory: {advisory!r}") + try: + inferences = advisory_importer.get_inferences( + advisory_data=advisory.to_advisory_data() + ) + process_inferences( + inferences=inferences, + advisory=advisory, + improver_name=importer_name, + ) + except Exception as e: + logger.info(f"Failed to process advisory: {advisory!r} with error {e!r}") + logger.info("Finished importing using %s.", advisory_importer.__class__.qualified_name) + + def process_advisories( + self, advisory_datas: Iterable[AdvisoryData], importer_name: str + ) -> List: + """ + Insert advisories into the database + Return the number of inserted advisories. + """ + count = 0 + advisories = [] + for data in advisory_datas: + try: + obj, created = Advisory.objects.get_or_create( + aliases=data.aliases, + summary=data.summary, + affected_packages=[pkg.to_dict() for pkg in data.affected_packages], + references=[ref.to_dict() for ref in data.references], + date_published=data.date_published, + weaknesses=data.weaknesses, + defaults={ + "created_by": importer_name, + "date_collected": datetime.datetime.now(tz=datetime.timezone.utc), + }, + ) + if not obj.date_imported: + advisories.append(obj) + except Exception as e: + logger.error( + f"Error while processing {data!r} with aliases {data.aliases!r}: {e!r} \n {traceback_format_exc()}" + ) + continue + if created: + logger.info( + f"[*] New Advisory with aliases: {obj.aliases!r}, created_by: {obj.created_by}" + ) + count += 1 + else: + logger.debug(f"Advisory with aliases: {obj.aliases!r} already exists.") + try: + self.do_import(advisories) + except Exception as e: + logger.error( + f"Error while processing advisories from {importer_name!r}: {e!r} \n {traceback_format_exc()}" + ) + return count + -def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: str) -> List: +@transaction.atomic +def process_inferences(inferences: List[Inference], advisory: Advisory, improver_name: str): """ - Insert advisories into the database - Return the number of inserted advisories. + Return number of inferences processed. + An atomic transaction that updates both the Advisory (e.g. date_imported) + and processes the given inferences to create or update corresponding + database fields. + + This avoids failing the entire improver when only a single inference is + erroneous. Also, the atomic transaction for every advisory and its + inferences makes sure that date_imported of advisory is consistent. """ - count = 0 - for data in advisory_datas: - # https://nvd.nist.gov/vuln/detail/CVE-2013-4314 - # https://github.com/cms-dev/cms/issues/888#issuecomment-516977572 - data.summary = data.summary.replace("\x00", "\uFFFD") - obj, created = Advisory.objects.get_or_create( - aliases=data.aliases, - summary=data.summary, - affected_packages=[pkg.to_dict() for pkg in data.affected_packages], - references=[ref.to_dict() for ref in data.references], - date_published=data.date_published, - weaknesses=data.weaknesses, - defaults={ - "created_by": importer_name, - "date_collected": datetime.datetime.now(tz=datetime.timezone.utc), - }, + inferences_processed_count = 0 + + if not inferences: + logger.warning(f"Nothing to improve. Source: {improver_name} Advisory id: {advisory.id}") + return inferences_processed_count + + logger.info(f"Improving advisory id: {advisory.id}") + + for inference in inferences: + vulnerability = get_or_create_vulnerability_and_aliases( + vulnerability_id=inference.vulnerability_id, + alias_names=inference.aliases, + summary=inference.summary, ) - if created: - logger.info( - f"[*] New Advisory with aliases: {obj.aliases!r}, created_by: {obj.created_by}" + + if not vulnerability: + logger.warning(f"Unable to get vulnerability for inference: {inference!r}") + continue + + for ref in inference.references: + + reference = VulnerabilityReference.objects.get_or_none( + reference_id=ref.reference_id, + url=ref.url, + ) + + if not reference: + reference = create_valid_vulnerability_reference( + reference_id=ref.reference_id, + url=ref.url, + ) + if not reference: + continue + + VulnerabilityRelatedReference.objects.update_or_create( + reference=reference, + vulnerability=vulnerability, ) - count += 1 + + for severity in ref.severities: + _vs, updated = VulnerabilitySeverity.objects.update_or_create( + scoring_system=severity.system.identifier, + reference=reference, + defaults={ + "value": str(severity.value), + "scoring_elements": str(severity.scoring_elements), + }, + ) + if updated: + logger.info( + f"Severity updated for reference {ref!r} to value: {severity.value!r} " + f"and scoring_elements: {severity.scoring_elements!r}" + ) + + for affected_purl in inference.affected_purls or []: + vulnerable_package = Package.objects.get_or_create_from_purl(purl=affected_purl) + PackageRelatedVulnerability( + vulnerability=vulnerability, + package=vulnerable_package, + created_by=improver_name, + confidence=inference.confidence, + fix=False, + ).update_or_create() + + if inference.fixed_purl: + fixed_package = Package.objects.get_or_create_from_purl(purl=inference.fixed_purl) + PackageRelatedVulnerability( + vulnerability=vulnerability, + package=fixed_package, + created_by=improver_name, + confidence=inference.confidence, + fix=True, + ).update_or_create() + + if inference.weaknesses and vulnerability: + for cwe_id in inference.weaknesses: + cwe_obj, created = Weakness.objects.get_or_create(cwe_id=cwe_id) + cwe_obj.vulnerabilities.add(vulnerability) + cwe_obj.save() + inferences_processed_count += 1 + + advisory.date_imported = datetime.datetime.now(tz=datetime.timezone.utc) + advisory.save() + return inferences_processed_count + + +def create_valid_vulnerability_reference(url, reference_id=None): + """ + Create and return a new validated VulnerabilityReference from a + ``url`` and ``reference_id``. + Return None and log a warning if this is not a valid reference. + """ + reference = VulnerabilityReference( + reference_id=reference_id, + url=url, + ) + + try: + reference.full_clean() + except ValidationError as e: + logger.warning(f"Invalid vulnerability reference: {reference!r}: {e}") + return + + reference.save() + return reference + + +def get_or_create_vulnerability_and_aliases( + aliases: List[str], vulnerability_id=None, summary=None +): + """ + Get or create vulnerabilitiy and aliases such that all existing and new + aliases point to the same vulnerability + """ + aliases = set(alias.strip() for alias in aliases if alias and alias.strip()) + new_alias_names, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases) + + # All aliases must point to the same vulnerability + vulnerability = None + if existing_vulns: + if len(existing_vulns) != 1: + vcids = ", ".join(v.vulnerability_id for v in existing_vulns) + logger.error( + f"Cannot create vulnerability. " + f"Aliases {aliases} already exist and point " + f"to multiple vulnerabilities {vcids}." + ) + return else: - logger.debug(f"Advisory with aliases: {obj.aliases!r} already exists. Skipped.") + vulnerability = existing_vulns.pop() + + if vulnerability_id and vulnerability.vulnerability_id != vulnerability_id: + logger.error( + f"Cannot create vulnerability. " + f"Aliases {aliases} already exist and point to a different " + f"vulnerability {vulnerability} than the requested " + f"vulnerability {vulnerability_id}." + ) + return + + if vulnerability_id and not vulnerability: + try: + vulnerability = Vulnerability.objects.get(vulnerability_id=vulnerability_id) + except Vulnerability.DoesNotExist: + logger.error(f"Cannot get requested vulnerability {vulnerability_id}.") + return + if vulnerability: + # TODO: We should keep multiple summaries, one for each advisory + # if summary and summary != vulnerability.summary: + # logger.warning( + # f"Inconsistent summary for {vulnerability.vulnerability_id}. " + # f"Existing: {vulnerability.summary!r}, provided: {summary!r}" + # ) + associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_alias_names) + else: + try: + vulnerability = create_vulnerability_and_add_aliases( + aliases=new_alias_names, summary=summary + ) + except Exception as e: + logger.error( + f"Cannot create vulnerability with summary {summary!r} and {new_alias_names!r} {e!r}.\n{traceback_format_exc()}." + ) + return + + return vulnerability + + +def get_vulns_for_aliases_and_get_new_aliases(aliases): + """ + Return ``new_aliases`` that are not in the database and + ``existing_vulns`` that point to the given ``aliases``. + """ + new_aliases = set(aliases) + existing_vulns = set() + for alias in Alias.objects.filter(alias__in=aliases): + existing_vulns.add(alias.vulnerability) + new_aliases.remove(alias.alias) + return new_aliases, existing_vulns + + +@transaction.atomic +def create_vulnerability_and_add_aliases(aliases, summary): + """ + Return a new ``vulnerability`` created with ``summary`` + and associate the ``vulnerability`` with ``aliases``. + Raise exception if no alias is associated with the ``vulnerability``. + """ + vulnerability = Vulnerability(summary=summary) + vulnerability.save() + associate_vulnerability_with_aliases(aliases, vulnerability) + if not vulnerability.aliases.count(): + raise Exception(f"Vulnerability {vulnerability.vcid} must have one or more aliases") + return vulnerability + - return count +def associate_vulnerability_with_aliases(aliases, vulnerability): + for alias_name in aliases: + alias = Alias(alias=alias_name, vulnerability=vulnerability) + alias.save() + logger.info(f"New alias for {vulnerability!r}: {alias_name}") diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 0bc1b788d..2b4d73b2d 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -24,6 +24,7 @@ import pytz from dateutil import parser as dateparser +from fetchcode.vcs import VCSResponse from fetchcode.vcs import fetch_via_vcs from license_expression import Licensing from packageurl import PackageURL @@ -187,7 +188,7 @@ def merge( purls.add(pkg.package) if len(purls) > 1: raise UnMergeablePackageError("Cannot merge with different purls", purls) - return purls.pop(), sorted(affected_version_ranges), sorted(fixed_versions) + return purls.pop(), list(affected_version_ranges), sorted(fixed_versions) def to_dict(self): """ @@ -251,6 +252,16 @@ class AdvisoryData: def __post_init__(self): if self.date_published and not self.date_published.tzinfo: logger.warning(f"AdvisoryData with no tzinfo: {self!r}") + if self.summary: + self.summary = self.clean_summary(self.summary) + + def clean_summary(self, summary): + # https://nvd.nist.gov/vuln/detail/CVE-2013-4314 + # https://github.com/cms-dev/cms/issues/888#issuecomment-516977572 + summary = summary.strip() + if summary: + summary = summary.replace("\x00", "\uFFFD") + return summary def to_dict(self): return { @@ -288,6 +299,10 @@ class InvalidSPDXLicense(Exception): pass +class ForkError(Exception): + pass + + class Importer: """ An Importer collects data from various upstreams and returns corresponding AdvisoryData objects @@ -297,7 +312,7 @@ class Importer: spdx_license_expression = "" license_url = "" notice = "" - vcs_response = None + vcs_response: VCSResponse = None def __init__(self): if not self.spdx_license_expression: @@ -324,47 +339,18 @@ def advisory_data(self) -> Iterable[AdvisoryData]: raise NotImplementedError def clone(self, repo_url): + """ + Clone the repo at repo_url and return the VCSResponse object + """ try: self.vcs_response = fetch_via_vcs(repo_url) + return self.vcs_response except Exception as e: msg = f"Failed to fetch {repo_url} via vcs: {e}" logger.error(msg) raise ForkError(msg) from e -class ForkError(Exception): - pass - - -class GitImporter(Importer): - def __init__(self, repo_url): - super().__init__() - self.repo_url = repo_url - self.vcs_response = None - - def __enter__(self): - super().__enter__() - self.clone() - return self - - def __exit__(self): - self.vcs_response.delete() - - def clone(self): - try: - self.vcs_response = fetch_via_vcs(self.repo_url) - except Exception as e: - msg = f"Failed to fetch {self.repo_url} via vcs: {e}" - logger.error(msg) - raise ForkError(msg) from e - - def advisory_data(self) -> Iterable[AdvisoryData]: - """ - Return AdvisoryData objects corresponding to the data being imported - """ - raise NotImplementedError - - # TODO: Needs rewrite class OvalImporter(Importer): """ diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 4a2fd6ac7..add6967f8 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -25,6 +25,7 @@ from vulnerabilities.importers import npm from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl +from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql from vulnerabilities.importers import project_kb_msr2019 from vulnerabilities.importers import pypa @@ -37,21 +38,21 @@ from vulnerabilities.importers import xen IMPORTERS_REGISTRY = [ + nvd.NVDImporter, + github.GitHubAPIImporter, + gitlab.GitLabAPIImporter, + npm.NpmImporter, + pypa.PyPaImporter, nginx.NginxImporter, + pysec.PyPIImporter, alpine_linux.AlpineImporter, - github.GitHubAPIImporter, - nvd.NVDImporter, openssl.OpensslImporter, redhat.RedhatImporter, - pysec.PyPIImporter, debian.DebianImporter, - gitlab.GitLabAPIImporter, postgresql.PostgreSQLImporter, - pypa.PyPaImporter, archlinux.ArchlinuxImporter, ubuntu.UbuntuImporter, debian_oval.DebianOvalImporter, - npm.NpmImporter, retiredotnet.RetireDotnetImporter, apache_httpd.ApacheHTTPDImporter, mozilla.MozillaImporter, @@ -65,6 +66,7 @@ ubuntu_usn.UbuntuUSNImporter, fireeye.FireyeImporter, apache_kafka.ApacheKafkaImporter, + oss_fuzz.OSSFuzzImporter, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/importers/apache_httpd.py b/vulnerabilities/importers/apache_httpd.py index 6f120addd..0b5e4a425 100644 --- a/vulnerabilities/importers/apache_httpd.py +++ b/vulnerabilities/importers/apache_httpd.py @@ -9,15 +9,9 @@ import logging import urllib -from datetime import datetime -from typing import Iterable -from typing import List -from typing import Mapping -from typing import Optional import requests from bs4 import BeautifulSoup -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_constraint import VersionConstraint from univers.version_range import ApacheVersionRange @@ -27,18 +21,9 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference -from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.package_managers import GitHubTagsAPI -from vulnerabilities.package_managers import VersionAPI from vulnerabilities.severity_systems import APACHE_HTTPD -from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage -from vulnerabilities.utils import get_affected_packages_by_patched_package -from vulnerabilities.utils import nearest_patched_package -from vulnerabilities.utils import resolve_version_range +from vulnerabilities.utils import get_item logger = logging.getLogger(__name__) @@ -56,16 +41,18 @@ def advisory_data(self): yield self.to_advisory(data) def to_advisory(self, data): - alias = data["CVE_data_meta"]["ID"] - descriptions = data["description"]["description_data"] + alias = get_item(data, "CVE_data_meta", "ID") + if not alias: + alias = get_item(data, "cveMetadata", "cveId") + descriptions = get_item(data, "description", "description_data") or [] description = None for desc in descriptions: - if desc["lang"] == "eng": + if desc.get("lang") == "eng": description = desc.get("value") break severities = [] - impacts = data.get("impact", []) + impacts = data.get("impact") or [] for impact in impacts: value = impact.get("other") if value: @@ -84,14 +71,14 @@ def to_advisory(self, data): ) versions_data = [] - for vendor in data["affects"]["vendor"]["vendor_data"]: - for products in vendor["product"]["product_data"]: - for version_data in products["version"]["version_data"]: + for vendor in get_item(data, "affects", "vendor", "vendor_data") or []: + for products in get_item(vendor, "product", "product_data") or []: + for version_data in get_item(products, "version", "version_data") or []: versions_data.append(version_data) fixed_versions = [] for timeline_object in data.get("timeline") or []: - timeline_value = timeline_object["value"] + timeline_value = timeline_object.get("value") if "release" in timeline_value: split_timeline_value = timeline_value.split(" ") if "never" in timeline_value: @@ -116,7 +103,7 @@ def to_advisory(self, data): return AdvisoryData( aliases=[alias], - summary=description, + summary=description or "", affected_packages=affected_packages, references=[reference], ) @@ -163,144 +150,3 @@ def fetch_links(url): continue links.append(urllib.parse.urljoin(url, link)) return links - - -IGNORE_TAGS = { - "AGB_BEFORE_AAA_CHANGES", - "APACHE_1_2b1", - "APACHE_1_2b10", - "APACHE_1_2b11", - "APACHE_1_2b2", - "APACHE_1_2b3", - "APACHE_1_2b4", - "APACHE_1_2b5", - "APACHE_1_2b6", - "APACHE_1_2b7", - "APACHE_1_2b8", - "APACHE_1_2b9", - "APACHE_1_3_PRE_NT", - "APACHE_1_3a1", - "APACHE_1_3b1", - "APACHE_1_3b2", - "APACHE_1_3b3", - "APACHE_1_3b5", - "APACHE_1_3b6", - "APACHE_1_3b7", - "APACHE_2_0_2001_02_09", - "APACHE_2_0_52_WROWE_RC1", - "APACHE_2_0_ALPHA", - "APACHE_2_0_ALPHA_2", - "APACHE_2_0_ALPHA_3", - "APACHE_2_0_ALPHA_4", - "APACHE_2_0_ALPHA_5", - "APACHE_2_0_ALPHA_6", - "APACHE_2_0_ALPHA_7", - "APACHE_2_0_ALPHA_8", - "APACHE_2_0_ALPHA_9", - "APACHE_2_0_BETA_CANDIDATE_1", - "APACHE_BIG_SYMBOL_RENAME_POST", - "APACHE_BIG_SYMBOL_RENAME_PRE", - "CHANGES", - "HTTPD_LDAP_1_0_0", - "INITIAL", - "MOD_SSL_2_8_3", - "PCRE_3_9", - "POST_APR_SPLIT", - "PRE_APR_CHANGES", - "STRIKER_2_0_51_RC1", - "STRIKER_2_0_51_RC2", - "STRIKER_2_1_0_RC1", - "WROWE_2_0_43_PRE1", - "apache-1_3-merge-1-post", - "apache-1_3-merge-1-pre", - "apache-1_3-merge-2-post", - "apache-1_3-merge-2-pre", - "apache-apr-merge-3", - "apache-doc-split-01", - "dg_last_1_2_doc_merge", - "djg-apache-nspr-07", - "djg_nspr_split", - "moving_to_httpd_module", - "mpm-3", - "mpm-merge-1", - "mpm-merge-2", - "post_ajp_proxy", - "pre_ajp_proxy", -} - - -class ApacheHTTPDImprover(Improver): - def __init__(self) -> None: - self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} - self.vesions_by_purl = {} - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=ApacheHTTPDImporter.qualified_name) - - def get_package_versions( - self, package_url: PackageURL, until: Optional[datetime] = None - ) -> List[str]: - """ - Return a list of `valid_versions` for the `package_url` - """ - api_name = "apache/httpd" - versions_fetcher = GitHubTagsAPI() - return versions_fetcher.get_until(package_name=api_name, until=until).valid_versions - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return - try: - purl, affected_version_ranges, _ = AffectedPackage.merge( - advisory_data.affected_packages - ) - except UnMergeablePackageError: - logger.error(f"Cannot merge with different purls {advisory_data.affected_packages!r}") - return iter([]) - - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - - if not self.vesions_by_purl.get(str(purl)): - valid_versions = self.get_package_versions( - package_url=purl, until=advisory_data.date_published - ) - self.vesions_by_purl[str(purl)] = valid_versions - - valid_versions = self.vesions_by_purl[str(purl)] - - for affected_version_range in affected_version_ranges: - aff_vers, unaff_vers = resolve_version_range( - affected_version_range=affected_version_range, - package_versions=valid_versions, - ignorable_versions=IGNORE_TAGS, - ) - affected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in aff_vers - ] - - unaffected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in unaff_vers - ] - - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) - - for ( - fixed_package, - affected_packages, - ) in get_affected_packages_by_patched_package(affected_packages).items(): - yield Inference.from_advisory_data( - advisory_data, - confidence=100, # We are getting all valid versions to get this inference - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) diff --git a/vulnerabilities/importers/apache_kafka.py b/vulnerabilities/importers/apache_kafka.py index 96b55748f..1195d3d28 100644 --- a/vulnerabilities/importers/apache_kafka.py +++ b/vulnerabilities/importers/apache_kafka.py @@ -8,6 +8,8 @@ # +import logging + import pytz import requests from bs4 import BeautifulSoup @@ -19,6 +21,8 @@ from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +logger = logging.getLogger(__name__) + # The entries below with `"action": "omit"` have no useful/reportable fixed or affected version data. # See https://kafka.apache.org/cve-list affected_version_range_mapping = { @@ -135,13 +139,17 @@ def to_advisory(self, advisory_page): fixed_versions_clean = [v.strip() for v in fixed_versions.split(",")] fixed_versions_clean = [v for v in fixed_versions if v] - # This throws a KeyError if the opening h2 tag `id` data changes or is not in the - # hard-coded affected_version_range_mapping dictionary. - cve_version_mapping = affected_version_range_mapping[cve_id] - if cve_version_mapping["action"] == "include": - # These 2 variables (not used elsewhere) trigger the KeyError for changed/missing data. - check_affected_versions_key = cve_version_mapping[affected_versions] - check_fixed_versions_key = cve_version_mapping[fixed_versions] + cve_version_mapping = affected_version_range_mapping.get(cve_id) + if not cve_version_mapping: + logger.error(f"Data for {cve_id} not found in mapping. Skipping.") + if cve_version_mapping and cve_version_mapping.get("action") == "include": + check_affected_versions_key = cve_version_mapping.get(affected_versions) or [] + check_fixed_versions_key = cve_version_mapping.get(fixed_versions) or [] + + if not check_affected_versions_key: + logger.error(f"Affected versions for {cve_id} not found in mapping. Skipping.") + if not check_fixed_versions_key: + logger.error(f"Fixed versions for {cve_id} not found in mapping. Skipping.") references = [ Reference( @@ -159,18 +167,22 @@ def to_advisory(self, advisory_page): ] affected_packages = [] - affected_package = AffectedPackage( - package=PackageURL( - name="kafka", - type="apache", - ), - affected_version_range=cve_version_mapping["affected_version_range"], - ) - affected_packages.append(affected_package) + affected_version_range = cve_version_mapping.get("affected_version_range") + if cve_version_mapping.get("affected_version_range"): + affected_package = AffectedPackage( + package=PackageURL( + name="kafka", + type="apache", + ), + affected_version_range=affected_version_range, + ) + affected_packages.append(affected_package) - date_published = parse(cve_version_mapping["Issue announced"]).replace( - tzinfo=pytz.UTC - ) + date_published = None + issue_announced = cve_version_mapping.get("Issue announced") + + if issue_announced: + date_published = parse(issue_announced).replace(tzinfo=pytz.UTC) advisories.append( AdvisoryData( diff --git a/vulnerabilities/importers/apache_tomcat.py b/vulnerabilities/importers/apache_tomcat.py index 3d754d6df..04270059a 100644 --- a/vulnerabilities/importers/apache_tomcat.py +++ b/vulnerabilities/importers/apache_tomcat.py @@ -138,7 +138,7 @@ def fetch_advisory_links(self, url): for tag in soup.find_all("a"): link = tag.get("href") - if "security-" in link and any(char.isdigit() for char in link): + if link and "security-" in link and any(char.isdigit() for char in link): yield urllib.parse.urljoin(url, link) def advisory_data(self): diff --git a/vulnerabilities/importers/debian.py b/vulnerabilities/importers/debian.py index 5191639db..113550673 100644 --- a/vulnerabilities/importers/debian.py +++ b/vulnerabilities/importers/debian.py @@ -14,7 +14,6 @@ from typing import Mapping import requests -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_range import DebianVersionRange from univers.versions import DebianVersion @@ -23,16 +22,8 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference -from vulnerabilities.importer import UnMergeablePackageError -from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import dedupe -from vulnerabilities.utils import get_affected_packages_by_patched_package from vulnerabilities.utils import get_item -from vulnerabilities.utils import nearest_patched_package logger = logging.getLogger(__name__) @@ -164,74 +155,3 @@ def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryD affected_packages=affected_packages, references=references, ) - - -class DebianBasicImprover(Improver): - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=DebianImporter.qualified_name) - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return - try: - purl, affected_version_ranges, fixed_versions = AffectedPackage.merge( - advisory_data.affected_packages - ) - except UnMergeablePackageError: - logger.error(f"Cannot merge with different purls {advisory_data.affected_packages!r}") - return - - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - pkg_qualifiers = purl.qualifiers - fixed_purls = [ - PackageURL( - type=pkg_type, - namespace=pkg_namespace, - name=pkg_name, - version=str(version), - qualifiers=pkg_qualifiers, - ) - for version in fixed_versions - ] - if not affected_version_ranges: - for fixed_purl in fixed_purls: - yield Inference.from_advisory_data( - advisory_data, # We are getting all valid versions to get this inference - confidence=MAX_CONFIDENCE, - affected_purls=[], - fixed_purl=fixed_purl, - ) - else: - aff_versions = set() - for affected_version_range in affected_version_ranges: - for constraint in affected_version_range.constraints: - aff_versions.add(constraint.version.string) - affected_purls = [ - PackageURL( - type=pkg_type, - namespace=pkg_namespace, - name=pkg_name, - version=version, - qualifiers=pkg_qualifiers, - ) - for version in aff_versions - ] - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=fixed_purls - ) - - for (fixed_package, affected_packages,) in get_affected_packages_by_patched_package( - affected_packages=affected_packages - ).items(): - yield Inference.from_advisory_data( - advisory_data, - confidence=MAX_CONFIDENCE, # We are getting all valid versions to get this inference - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) diff --git a/vulnerabilities/importers/elixir_security.py b/vulnerabilities/importers/elixir_security.py index a1d0a33cf..ff9deed70 100644 --- a/vulnerabilities/importers/elixir_security.py +++ b/vulnerabilities/importers/elixir_security.py @@ -29,7 +29,7 @@ class ElixirSecurityImporter(Importer): def advisory_data(self) -> Set[AdvisoryData]: try: - self.clone(self.repo_url) + self.clone(repo_url=self.repo_url) path = Path(self.vcs_response.dest_dir) vuln = path / "packages" for file in vuln.glob("**/*.yml"): diff --git a/vulnerabilities/importers/fireeye.py b/vulnerabilities/importers/fireeye.py index dc9fc129f..940154bb5 100644 --- a/vulnerabilities/importers/fireeye.py +++ b/vulnerabilities/importers/fireeye.py @@ -13,7 +13,7 @@ from typing import List from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import build_description from vulnerabilities.utils import dedupe @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -class FireyeImporter(GitImporter): +class FireyeImporter(Importer): spdx_license_expression = "CC-BY-SA-4.0 AND MIT" license_url = "https://github.com/mandiant/Vulnerability-Disclosures/blob/master/README.md" notice = """ @@ -30,23 +30,25 @@ class FireyeImporter(GitImporter): 1. CC BY-SA 4.0 - For CVE related information not including source code (such as PoCs) 2. MIT - For source code contained within provided CVE information """ - - def __init__(self): - super().__init__(repo_url="git+https://github.com/mandiant/Vulnerability-Disclosures") + repo_url = "git+https://github.com/mandiant/Vulnerability-Disclosures" def advisory_data(self) -> Iterable[AdvisoryData]: - self.clone() - files = filter( - lambda p: p.suffix in [".md", ".MD"], Path(self.vcs_response.dest_dir).glob("**/*") - ) - for file in files: - if Path(file).stem == "README": - continue - try: - with open(file) as f: - yield parse_advisory_data(f.read()) - except UnicodeError: - logger.error(f"Invalid file {file}") + try: + self.clone(repo_url=self.repo_url) + files = filter( + lambda p: p.suffix in [".md", ".MD"], Path(self.vcs_response.dest_dir).glob("**/*") + ) + for file in files: + if Path(file).stem == "README": + continue + try: + with open(file) as f: + yield parse_advisory_data(f.read()) + except UnicodeError: + logger.error(f"Invalid file {file}") + finally: + if self.vcs_response: + self.vcs_response.delete() def parse_advisory_data(raw_data) -> AdvisoryData: diff --git a/vulnerabilities/importers/github.py b/vulnerabilities/importers/github.py index ad3643ba6..8ef1b3a9c 100644 --- a/vulnerabilities/importers/github.py +++ b/vulnerabilities/importers/github.py @@ -8,14 +8,11 @@ # import logging -from datetime import datetime from typing import Iterable -from typing import List -from typing import Mapping from typing import Optional +from cwe2.database import Database from dateutil import parser as dateparser -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import build_range_from_github_advisory_constraint @@ -26,88 +23,13 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference -from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE -from vulnerabilities.package_managers import GoproxyVersionAPI -from vulnerabilities.package_managers import VersionAPI -from vulnerabilities.package_managers import get_api_package_name -from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import dedupe -from vulnerabilities.utils import get_affected_packages_by_patched_package +from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item -from vulnerabilities.utils import nearest_patched_package -from vulnerabilities.utils import resolve_version_range logger = logging.getLogger(__name__) -WEIRD_IGNORABLE_VERSIONS = frozenset( - [ - "0.1-bulbasaur", - "0.1-charmander", - "0.3m1", - "0.3m2", - "0.3m3", - "0.3m4", - "0.3m5", - "0.4m1", - "0.4m2", - "0.4m3", - "0.4m4", - "0.4m5", - "0.5m1", - "0.5m2", - "0.5m3", - "0.5m4", - "0.5m5", - "0.6m1", - "0.6m2", - "0.6m3", - "0.6m4", - "0.6m5", - "0.6m6", - "0.7.10p1", - "0.7.11p1", - "0.7.11p2", - "0.7.11p3", - "0.8.1p1", - "0.8.3p1", - "0.8.4p1", - "0.8.4p2", - "0.8.6p1", - "0.8.7p1", - "0.9-doduo", - "0.9-eevee", - "0.9-fearow", - "0.9-gyarados", - "0.9-horsea", - "0.9-ivysaur", - "2013-01-21T20:33:09+0100", - "2013-01-23T17:11:52+0100", - "2013-02-01T20:50:46+0100", - "2013-02-02T19:59:03+0100", - "2013-02-02T20:23:17+0100", - "2013-02-08T17:40:57+0000", - "2013-03-27T16:32:26+0100", - "2013-05-09T12:47:53+0200", - "2013-05-10T17:55:56+0200", - "2013-05-14T20:16:05+0200", - "2013-06-01T10:32:51+0200", - "2013-07-19T09:11:08+0000", - "2013-08-12T21:48:56+0200", - "2013-09-11T19-27-10", - "2013-12-23T17-51-15", - "2014-01-12T15-52-10", - "2.0.1rc2-git", - "3.0.0b3-", - "3.0b6dev-r41684", - "-class.-jw.util.version.Version-", - ] -) - PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM = { "MAVEN": "maven", "NUGET": "nuget", @@ -142,6 +64,11 @@ url } severity + cwes(first: 10){ + nodes { + cweId + } + } publishedAt } firstPatchedVersion{ @@ -216,12 +143,6 @@ def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: logger.error(f"get_purl: Unknown package type {pkg_type}") -class InvalidVersionRange(Exception): - """ - Raises exception when the version range is invalid - """ - - def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: """ Yield `AdvisoryData` by taking `resp` and `ecosystem` as input @@ -268,7 +189,7 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: affected_range = build_range_from_github_advisory_constraint( package_type, affected_range ) - except InvalidVersionRange as e: + except Exception as e: logger.error(f"Could not parse affected range {affected_range!r} {e!r}") affected_range = None if fixed_version: @@ -312,99 +233,34 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]: else: logger.error(f"Unknown identifier type {identifier_type!r} and value {value!r}") + weaknesses = get_cwes_from_github_advisory(advisory) + yield AdvisoryData( aliases=sorted(dedupe(aliases)), summary=summary, references=references, affected_packages=affected_packages, date_published=date_published, + weaknesses=weaknesses, ) -class GitHubBasicImprover(Improver): - def __init__(self) -> None: - self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=GitHubAPIImporter.qualified_name) - - def get_package_versions( - self, package_url: PackageURL, until: Optional[datetime] = None - ) -> List[str]: - """ - Return a list of `valid_versions` for the `package_url` - """ - api_name = get_api_package_name(package_url) - if not api_name: - logger.error(f"Could not get versions for {package_url!r}") - return [] - versions_fetcher = self.versions_fetcher_by_purl.get(package_url) - if not versions_fetcher: - versions_fetcher: VersionAPI = VERSION_API_CLASSES_BY_PACKAGE_TYPE[package_url.type] - self.versions_fetcher_by_purl[package_url] = versions_fetcher() - - versions_fetcher = self.versions_fetcher_by_purl[package_url] - - self.versions_fetcher_by_purl[package_url] = versions_fetcher - return versions_fetcher.get_until(package_name=api_name, until=until).valid_versions - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return - try: - purl, affected_version_ranges, _ = AffectedPackage.merge( - advisory_data.affected_packages - ) - except UnMergeablePackageError: - logger.error(f"Cannot merge with different purls {advisory_data.affected_packages!r}") - return iter([]) - - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - if purl.type == "golang": - # Problem with the Golang and Go that they provide full path - # FIXME: We need to get the PURL subpath for Go module - versions_fetcher = self.versions_fetcher_by_purl.get(purl) - if not versions_fetcher: - versions_fetcher = GoproxyVersionAPI() - self.versions_fetcher_by_purl[purl] = versions_fetcher - pkg_name = versions_fetcher.module_name_by_package_name.get(pkg_name, pkg_name) - - valid_versions = self.get_package_versions( - package_url=purl, until=advisory_data.date_published - ) - for affected_version_range in affected_version_ranges: - aff_vers, unaff_vers = resolve_version_range( - affected_version_range=affected_version_range, - package_versions=valid_versions, - ignorable_versions=WEIRD_IGNORABLE_VERSIONS, - ) - affected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in aff_vers - ] - - unaffected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in unaff_vers - ] - - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) - - for ( - fixed_package, - affected_packages, - ) in get_affected_packages_by_patched_package(affected_packages).items(): - yield Inference.from_advisory_data( - advisory_data, - confidence=100, # We are getting all valid versions to get this inference - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) +def get_cwes_from_github_advisory(advisory) -> [int]: + """ + Return the cwe-id list from advisory ex: [ 522 ] + by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}] + then remove the CWE- from string and convert it to integer 522 and Check if the CWE in CWE-Database + """ + weaknesses = [] + db = Database() + cwe_list = get_item(advisory, "cwes", "nodes") or [] + for cwe_item in cwe_list: + cwe_string = get_item(cwe_item, "cweId") + if cwe_string: + cwe_id = get_cwe_id(cwe_string) + try: + db.get(cwe_id) + weaknesses.append(cwe_id) + except Exception: + logger.error("Invalid CWE id") + return weaknesses diff --git a/vulnerabilities/importers/gitlab.py b/vulnerabilities/importers/gitlab.py index afdb3f865..561b1a7d9 100644 --- a/vulnerabilities/importers/gitlab.py +++ b/vulnerabilities/importers/gitlab.py @@ -9,17 +9,14 @@ import logging import traceback -from datetime import datetime from pathlib import Path from typing import Iterable from typing import List -from typing import Mapping from typing import Optional import pytz import saneyaml from dateutil import parser as dateparser -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import VersionRange @@ -28,27 +25,15 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference -from vulnerabilities.importer import UnMergeablePackageError -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE -from vulnerabilities.package_managers import GoproxyVersionAPI -from vulnerabilities.package_managers import VersionAPI -from vulnerabilities.package_managers import get_api_package_name -from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import build_description -from vulnerabilities.utils import get_affected_packages_by_patched_package -from vulnerabilities.utils import nearest_patched_package -from vulnerabilities.utils import resolve_version_range +from vulnerabilities.utils import get_cwe_id logger = logging.getLogger(__name__) - PURL_TYPE_BY_GITLAB_SCHEME = { - # "conan": "conan", + "conan": "conan", "gem": "gem", # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 # "go": "golang", @@ -59,20 +44,17 @@ "pypi": "pypi", } - GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()} -class GitLabAPIImporter(GitImporter): +class GitLabAPIImporter(Importer): spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" + repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" - def __init__(self): - super().__init__(repo_url="git+https://gitlab.com/gitlab-org/advisories-community/") - - def advisory_data(self, _keep_clone=True) -> Iterable[AdvisoryData]: + def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: try: - self.clone() + self.clone(repo_url=self.repo_url) base_path = Path(self.vcs_response.dest_dir) for file_path in base_path.glob("**/*.yml"): @@ -203,6 +185,10 @@ def parse_gitlab_advisory(file): summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description")) urls = gitlab_advisory.get("urls") references = [Reference.from_url(u) for u in urls] + + cwe_ids = gitlab_advisory.get("cwe_ids") or [] + cwe_list = list(map(get_cwe_id, cwe_ids)) + date_published = dateparser.parse(gitlab_advisory.get("pubdate")) date_published = date_published.replace(tzinfo=pytz.UTC) package_slug = gitlab_advisory.get("package_slug") @@ -218,7 +204,7 @@ def parse_gitlab_advisory(file): affected_version_range = None fixed_versions = gitlab_advisory.get("fixed_versions") or [] affected_range = gitlab_advisory.get("affected_range") - gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist"]) + gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] gitlab_scheme = GITLAB_SCHEME_BY_PURL_TYPE[purl.type] try: @@ -268,101 +254,5 @@ def parse_gitlab_advisory(file): references=references, date_published=date_published, affected_packages=affected_packages, + weaknesses=cwe_list, ) - - -class GitLabBasicImprover(Improver): - """ - Get the nearest fixed_version and then resolve the version range with the help of all valid versions. - Generate inference between all the affected packages and the fixed_version that fixes all those affected packages. - - In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. - Since we can not determine which package fixes which range. - """ - - def __init__(self) -> None: - self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=GitLabAPIImporter.qualified_name) - - def get_package_versions( - self, package_url: PackageURL, until: Optional[datetime] = None - ) -> List[str]: - """ - Return a list of `valid_versions` for the `package_url` - """ - api_name = get_api_package_name(purl=package_url) - if not api_name: - logger.error(f"Could not get versions for {package_url!r}") - return [] - versions_fetcher = self.versions_fetcher_by_purl.get(package_url) - if not versions_fetcher: - versions_fetcher: VersionAPI = VERSION_API_CLASSES_BY_PACKAGE_TYPE[package_url.type] - self.versions_fetcher_by_purl[package_url] = versions_fetcher() - - versions_fetcher = self.versions_fetcher_by_purl[package_url] - - self.versions_fetcher_by_purl[package_url] = versions_fetcher - return versions_fetcher.get_until(package_name=api_name, until=until).valid_versions - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return iter([]) - try: - purl, affected_version_ranges, _ = AffectedPackage.merge( - advisory_data.affected_packages - ) - except UnMergeablePackageError: - logger.error(f"Cannot merge with different purls {advisory_data.affected_packages!r}") - return iter([]) - - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - if purl.type == "golang": - # Problem with the Golang and Go that they provide full path - # FIXME: We need to get the PURL subpath for Go module - versions_fetcher = self.versions_fetcher_by_purl.get(purl) - if not versions_fetcher: - versions_fetcher = GoproxyVersionAPI() - self.versions_fetcher_by_purl[purl] = versions_fetcher - pkg_name = versions_fetcher.module_name_by_package_name.get(pkg_name, pkg_name) - - valid_versions = self.get_package_versions( - package_url=purl, until=advisory_data.date_published - ) - for affected_version_range in affected_version_ranges: - aff_vers, unaff_vers = resolve_version_range( - affected_version_range=affected_version_range, - package_versions=valid_versions, - ignorable_versions=[], - ) - affected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in aff_vers - ] - - unaffected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in unaff_vers - ] - - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) - - for ( - fixed_package, - affected_packages, - ) in get_affected_packages_by_patched_package(affected_packages).items(): - yield Inference.from_advisory_data( - advisory_data, # We are getting all valid versions to get this inference - confidence=100, - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index 951fa5297..d37c6083e 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -53,17 +53,21 @@ class IstioImporter(Importer): repo_url = "git+https://github.com/istio/istio.io/" def advisory_data(self) -> Set[AdvisoryData]: - self.clone(self.repo_url) - path = Path(self.vcs_response.dest_dir) - vuln = path / "content/en/news/security/" - for file in vuln.glob("**/*.md"): - # Istio website has files with name starting with underscore, these contain metadata - # required for rendering the website. We're not interested in these. - # See also https://github.com/nexB/vulnerablecode/issues/563 - file = str(file) - if file.endswith("_index.md"): - continue - yield from self.process_file(file) + try: + self.clone(repo_url=self.repo_url) + path = Path(self.vcs_response.dest_dir) + vuln = path / "content/en/news/security/" + for file in vuln.glob("**/*.md"): + # Istio website has files with name starting with underscore, these contain metadata + # required for rendering the website. We're not interested in these. + # See also https://github.com/nexB/vulnerablecode/issues/563 + file = str(file) + if file.endswith("_index.md"): + continue + yield from self.process_file(file) + finally: + if self.vcs_response: + self.vcs_response.delete() def process_file(self, path): @@ -155,70 +159,3 @@ def get_data_from_md(self, path): with open(path) as f: front_matter, _ = split_markdown_front_matter(f.read()) return saneyaml.load(front_matter) - - -class IstioImprover(Improver): - def __init__(self) -> None: - self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} - self.vesions_by_purl = {} - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=IstioImporter.qualified_name) - - def get_package_versions( - self, package_url: PackageURL, until: Optional[datetime] = None - ) -> List[str]: - """ - Return a list of `valid_versions` for the `package_url` - """ - api_name = "istio/istio" - versions_fetcher = GitHubTagsAPI() - return versions_fetcher.get_until(package_name=api_name, until=until).valid_versions - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return - for affected_package in advisory_data.affected_packages: - purl = affected_package.package - affected_version_range = affected_package.affected_version_range - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - if not self.vesions_by_purl.get("istio/istio"): - valid_versions = self.get_package_versions( - package_url=purl, until=advisory_data.date_published - ) - self.vesions_by_purl["istio/istio"] = valid_versions - valid_versions = self.vesions_by_purl["istio/istio"] - aff_vers, unaff_vers = resolve_version_range( - affected_version_range=affected_version_range, - package_versions=valid_versions, - ) - affected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in aff_vers - ] - - unaffected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in unaff_vers - ] - - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) - - for ( - fixed_package, - affected_packages, - ) in get_affected_packages_by_patched_package(affected_packages).items(): - yield Inference.from_advisory_data( - advisory_data, - confidence=100, # We are getting all valid versions to get this inference - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) diff --git a/vulnerabilities/importers/kaybee.py b/vulnerabilities/importers/kaybee.py index 7464a999f..1b908e4b5 100644 --- a/vulnerabilities/importers/kaybee.py +++ b/vulnerabilities/importers/kaybee.py @@ -10,13 +10,13 @@ from packageurl import PackageURL from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.utils import load_yaml from vulnerabilities.utils import nearest_patched_package -class KaybeeImporter(GitImporter): +class KaybeeImporter(Importer): def __enter__(self): super(KaybeeImporter, self).__enter__() self._added_files, self._updated_files = self.file_changes( diff --git a/vulnerabilities/importers/mozilla.py b/vulnerabilities/importers/mozilla.py index 1cdaac357..3c4324bdf 100644 --- a/vulnerabilities/importers/mozilla.py +++ b/vulnerabilities/importers/mozilla.py @@ -39,7 +39,7 @@ class MozillaImporter(Importer): def advisory_data(self) -> Iterable[AdvisoryData]: try: - self.clone(self.repo_url) + self.clone(repo_url=self.repo_url) path = Path(self.vcs_response.dest_dir) vuln = path / "announce" diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 1320fc895..918f29f20 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -23,15 +23,8 @@ from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference -from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.package_managers import GitHubTagsAPI -from vulnerabilities.package_managers import PackageVersion from vulnerabilities.severity_systems import GENERIC -from vulnerabilities.utils import evolve_purl logger = logging.getLogger(__name__) @@ -223,122 +216,3 @@ def build_severity(severity): severity = severity.strip() if severity: return VulnerabilitySeverity(system=GENERIC, value=severity) - - -class NginxBasicImprover(Improver): - """ - Improve Nginx data by fetching the its GitHub repo versions and resolving - the vulnerable ranges. - """ - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=NginxImporter.qualified_name) - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - all_versions = list(self.fetch_nginx_version_from_git_tags()) - yield from self.get_inferences_from_versions( - advisory_data=advisory_data, all_versions=all_versions - ) - - def get_inferences_from_versions( - self, advisory_data: AdvisoryData, all_versions: List[PackageVersion] - ) -> Iterable[Inference]: - """ - Yield inferences given an ``advisory_data`` and a ``all_versions`` of - PackageVersion. - """ - - try: - purl, affected_version_ranges, fixed_versions = AffectedPackage.merge( - advisory_data.affected_packages - ) - except UnMergeablePackageError: - logger.error( - f"NginxBasicImprover: Cannot merge with different purls: " - f"{advisory_data.affected_packages!r}" - ) - return iter([]) - - affected_purls = [] - for affected_version_range in affected_version_ranges: - for package_version in all_versions: - # FIXME: we should reference an NginxVersion tbd in univers - version = NginxVersion(package_version.value) - if is_vulnerable( - version=version, - affected_version_range=affected_version_range, - fixed_versions=fixed_versions, - ): - new_purl = evolve_purl(purl=purl, version=str(version)) - affected_purls.append(new_purl) - - # TODO: This also yields with a lower fixed version, maybe we should - # only yield fixes that are upgrades ? - for fixed_version in fixed_versions: - fixed_purl = evolve_purl(purl=purl, version=str(fixed_version)) - - yield Inference.from_advisory_data( - advisory_data, - # TODO: is 90 a correct confidence?? - confidence=90, - affected_purls=affected_purls, - fixed_purl=fixed_purl, - ) - - def fetch_nginx_version_from_git_tags(self): - """ - Yield all nginx PackageVersion from its git tags. - """ - nginx_versions = GitHubTagsAPI().fetch("nginx/nginx") - for version in nginx_versions: - cleaned = clean_nginx_git_tag(version.value) - yield PackageVersion(value=cleaned, release_date=version.release_date) - - -def clean_nginx_git_tag(tag): - """ - Return a cleaned ``version`` string from an nginx git tag. - - Nginx tags git release as in `release-1.2.3` - This removes the the `release-` prefix. - - For example: - >>> clean_nginx_git_tag("release-1.2.3") == "1.2.3" - True - >>> clean_nginx_git_tag("1.2.3") == "1.2.3" - True - """ - if tag.startswith("release-"): - _, _, tag = tag.partition("release-") - return tag - - -def is_vulnerable(version, affected_version_range, fixed_versions): - """ - Return True if the ``version`` Version for nginx is vulnerable according to - the nginx approach. - - A ``version`` is vulnerable as explained by @mdounin - in https://marc.info/?l=nginx&m=164070162912710&w=2 : - - "Note that it is generally trivial to find out if a version is - vulnerable or not from the information about a vulnerability, - without any knowledge about nginx branches. That is: - - - Check if the version is in "Vulnerable" range. If it's not, the - version is not vulnerable. - - - If it is, check if the branch is explicitly listed in the "Not - vulnerable". If it's not, the version is vulnerable. If it - is, check the minor number: if it's greater or equal to the - version listed as not vulnerable, the version is not vulnerable, - else the version is vulnerable." - - """ - if version in NginxVersionRange.from_string(affected_version_range.to_string()): - for fixed_version in fixed_versions: - if version.value.minor == fixed_version.value.minor and version >= fixed_version: - return False - return True - return False diff --git a/vulnerabilities/importers/npm.py b/vulnerabilities/importers/npm.py index 4b6df9055..e1112158c 100644 --- a/vulnerabilities/importers/npm.py +++ b/vulnerabilities/importers/npm.py @@ -36,7 +36,7 @@ class NpmImporter(Importer): def advisory_data(self) -> Iterable[AdvisoryData]: try: - self.clone(self.repo_url) + self.clone(repo_url=self.repo_url) path = Path(self.vcs_response.dest_dir) vuln = path / "vuln" @@ -52,7 +52,9 @@ def to_advisory_data(self, file: Path) -> List[AdvisoryData]: id = data.get("id") description = data.get("overview") or "" summary = data.get("title") or "" - date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC) + date_published = None + if isinstance(data.get("created_at"), str): + date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC) references = [] cvss_vector = data.get("cvss_vector") cvss_score = data.get("cvss_score") diff --git a/vulnerabilities/importers/oss_fuzz.py b/vulnerabilities/importers/oss_fuzz.py new file mode 100644 index 000000000..6e4d3fef4 --- /dev/null +++ b/vulnerabilities/importers/oss_fuzz.py @@ -0,0 +1,37 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import logging +from pathlib import Path +from typing import Iterable + +import saneyaml + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Importer +from vulnerabilities.importers.osv import parse_advisory_data + +logger = logging.getLogger(__name__) + + +class OSSFuzzImporter(Importer): + license_url = "https://github.com/google/oss-fuzz-vulns/blob/main/LICENSE" + spdx_license_expression = "CC-BY-4.0" + url = "git+https://github.com/google/oss-fuzz-vulns" + + def advisory_data(self) -> Iterable[AdvisoryData]: + try: + self.clone(repo_url=self.url) + path = Path(self.vcs_response.dest_dir) / "vulns" + for file in path.glob("**/*.yaml"): + with open(file) as f: + yaml_data = saneyaml.load(f.read()) + yield parse_advisory_data(yaml_data, supported_ecosystem="oss-fuzz") + finally: + if self.vcs_response: + self.vcs_response.delete() diff --git a/vulnerabilities/importers/osv.py b/vulnerabilities/importers/osv.py index c4ee58685..cb06b2162 100644 --- a/vulnerabilities/importers/osv.py +++ b/vulnerabilities/importers/osv.py @@ -27,6 +27,7 @@ from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import build_description from vulnerabilities.utils import dedupe +from vulnerabilities.utils import get_cwe_id logger = logging.getLogger(__name__) @@ -74,6 +75,9 @@ def parse_advisory_data(raw_data: dict, supported_ecosystem) -> Optional[Advisor fixed_version=version, ) ) + database_specific = raw_data.get("database_specific") or {} + cwe_ids = database_specific.get("cwe_ids") or [] + weaknesses = list(map(get_cwe_id, cwe_ids)) return AdvisoryData( aliases=aliases, @@ -81,6 +85,7 @@ def parse_advisory_data(raw_data: dict, supported_ecosystem) -> Optional[Advisor references=references, affected_packages=affected_packages, date_published=date_published, + weaknesses=weaknesses, ) diff --git a/vulnerabilities/importers/project_kb_msr2019.py b/vulnerabilities/importers/project_kb_msr2019.py index 9d281de72..9fcfb7cc6 100644 --- a/vulnerabilities/importers/project_kb_msr2019.py +++ b/vulnerabilities/importers/project_kb_msr2019.py @@ -10,6 +10,7 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +from vulnerabilities.utils import fetch_and_read_from_csv from vulnerabilities.utils import is_cve # Reading CSV file from a url using `requests` is bit too complicated. diff --git a/vulnerabilities/importers/pypa.py b/vulnerabilities/importers/pypa.py index 6ffbaae9f..9d62af48d 100644 --- a/vulnerabilities/importers/pypa.py +++ b/vulnerabilities/importers/pypa.py @@ -8,6 +8,7 @@ # import logging import os +from pathlib import Path from typing import Iterable import saneyaml @@ -23,33 +24,28 @@ class PyPaImporter(Importer): license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" spdx_license_expression = "CC-BY-4.0" - url = "git+https://github.com/pypa/advisory-database" + repo_url = "git+https://github.com/pypa/advisory-database" def advisory_data(self) -> Iterable[AdvisoryData]: - for raw_data in fork_and_get_files(self.url): - yield parse_advisory_data(raw_data=raw_data, supported_ecosystem="pypi") + try: + self.clone(repo_url=self.repo_url) + path = Path(self.vcs_response.dest_dir) + for raw_data in fork_and_get_files(path=path): + yield parse_advisory_data(raw_data=raw_data, supported_ecosystem="pypi") + finally: + if self.vcs_response: + self.vcs_response.delete() class ForkError(Exception): pass -def fork_and_get_files(url) -> dict: +def fork_and_get_files(path) -> dict: """ Yield advisorie data mappings from the PyPA GitHub repository at ``url``. """ - try: - fork_directory = fetch_via_git(url=url) - except Exception as e: - logger.error(f"Failed to clone url {url}: {e}") - raise ForkError(url) from e - - advisory_dirs = os.path.join(fork_directory.dest_dir, "vulns") - for root, _, files in os.walk(advisory_dirs): - for file in files: - path = os.path.join(root, file) - if not file.endswith(".yaml"): - logger.warning(f"Unsupported non-YAML PyPA advisory file: {path}") - continue - with open(path) as f: - yield saneyaml.load(f.read()) + advisory_dirs = path / "vulns" + for file in advisory_dirs.glob("**/*.yaml"): + with open(file) as f: + yield saneyaml.load(f.read()) diff --git a/vulnerabilities/importers/redhat.py b/vulnerabilities/importers/redhat.py index 967f0dc38..4e14a7f86 100644 --- a/vulnerabilities/importers/redhat.py +++ b/vulnerabilities/importers/redhat.py @@ -8,6 +8,7 @@ # import logging +import re from typing import Dict from typing import Iterable from typing import List @@ -23,6 +24,7 @@ from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.rpm_utils import rpm_to_purl +from vulnerabilities.utils import get_cwe_id from vulnerabilities.utils import get_item from vulnerabilities.utils import requests_with_5xx_retry @@ -61,7 +63,6 @@ def get_data_from_url(url): class RedhatImporter(Importer): - spdx_license_expression = "CC-BY-4.0" license_url = "https://access.redhat.com/documentation/en-us/red_hat_security_data_api/1.0/html/red_hat_security_data_api/legal-notice" @@ -98,24 +99,12 @@ def to_advisory(advisory_data): bugzilla = advisory_data.get("bugzilla") if bugzilla: url = "https://bugzilla.redhat.com/show_bug.cgi?id={}".format(bugzilla) - bugzilla_url = f"https://bugzilla.redhat.com/rest/bug/{bugzilla}" - bugzilla_data = get_data_from_url(bugzilla_url) - bugs = bugzilla_data.get("bugs") or [] - if bugs: - # why [0] only here? - severity = bugs[0].get("severity") - if severity: - bugzilla_severity = VulnerabilitySeverity( - system=severity_systems.REDHAT_BUGZILLA, - value=severity, - ) - references.append( - Reference( - severities=[bugzilla_severity], - url=url, - reference_id=bugzilla, - ) - ) + references.append( + Reference( + url=url, + reference_id=bugzilla, + ) + ) for rh_adv in advisory_data.get("advisories") or []: # RH provides 3 types of advisories RHSA, RHBA, RHEA. Only RHSA's contain severity score. @@ -126,25 +115,8 @@ def to_advisory(advisory_data): continue if "RHSA" in rh_adv.upper(): - rhsa_url = f"https://access.redhat.com/hydra/rest/securitydata/cvrf/{rh_adv}.json" - rhsa_data = get_data_from_url(rhsa_url) - if not rhsa_data: - continue - rhsa_aggregate_severities = [] - if rhsa_data.get("cvrfdoc"): - # not all RHSA errata have a corresponding CVRF document - value = get_item(rhsa_data, "cvrfdoc", "aggregate_severity") - if value: - rhsa_aggregate_severities.append( - VulnerabilitySeverity( - system=severity_systems.REDHAT_AGGREGATE, - value=value, - ) - ) - references.append( Reference( - severities=rhsa_aggregate_severities, url="https://access.redhat.com/errata/{}".format(rh_adv), reference_id=rh_adv, ) @@ -164,6 +136,11 @@ def to_advisory(advisory_data): scoring_elements=cvssv3_vector, ) ) + cwe_list = [] + # cwe_string : CWE-409","CWE-121->CWE-787","(CWE-401|CWE-404)","(CWE-190|CWE-911)->CWE-416" + cwe_string = advisory_data.get("CWE") + if cwe_string: + cwe_list = list(map(get_cwe_id, re.findall("CWE-[0-9]+", cwe_string))) aliases = [] alias = advisory_data.get("CVE") @@ -177,4 +154,5 @@ def to_advisory(advisory_data): summary=advisory_data.get("bugzilla_description") or "", affected_packages=affected_packages, references=references, + weaknesses=cwe_list, ) diff --git a/vulnerabilities/importers/retiredotnet.py b/vulnerabilities/importers/retiredotnet.py index 213a21369..04bd0582c 100644 --- a/vulnerabilities/importers/retiredotnet.py +++ b/vulnerabilities/importers/retiredotnet.py @@ -30,7 +30,7 @@ class RetireDotnetImporter(Importer): def advisory_data(self) -> Iterable[AdvisoryData]: try: - self.clone(self.repo_url) + self.clone(repo_url=self.repo_url) path = Path(self.vcs_response.dest_dir) vuln = path / "Content" diff --git a/vulnerabilities/importers/ruby.py b/vulnerabilities/importers/ruby.py index 1e116e3ff..556e39140 100644 --- a/vulnerabilities/importers/ruby.py +++ b/vulnerabilities/importers/ruby.py @@ -18,14 +18,14 @@ from univers.versions import SemverVersion from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.package_managers import RubyVersionAPI from vulnerabilities.utils import load_yaml from vulnerabilities.utils import nearest_patched_package -class RubyImporter(GitImporter): +class RubyImporter(Importer): def __enter__(self): super(RubyImporter, self).__enter__() diff --git a/vulnerabilities/importers/rust.py b/vulnerabilities/importers/rust.py index 701405128..a1e97c277 100644 --- a/vulnerabilities/importers/rust.py +++ b/vulnerabilities/importers/rust.py @@ -22,13 +22,13 @@ from univers.versions import SemverVersion from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.package_managers import CratesVersionAPI from vulnerabilities.utils import nearest_patched_package -class RustImporter(GitImporter): +class RustImporter(Importer): def __enter__(self): super(RustImporter, self).__enter__() diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index b6658d9ea..37c0b6c35 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -10,6 +10,7 @@ import logging from datetime import datetime from datetime import timezone +from traceback import format_exc as traceback_format_exc from typing import List from django.core.exceptions import ValidationError @@ -43,40 +44,48 @@ def run(self) -> None: improver = self.improver_class() logger.info(f"Running improver: {improver.qualified_name}") for advisory in improver.interesting_advisories: - inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data()) - process_inferences( - inferences=inferences, advisory=advisory, improver_name=improver.qualified_name - ) + logger.info(f"Processing advisory: {advisory!r}") + try: + inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data()) + process_inferences( + inferences=inferences, + advisory=advisory, + improver_name=improver.qualified_name, + ) + except Exception as e: + logger.info(f"Failed to process advisory: {advisory!r} with error {e!r}") logger.info("Finished improving using %s.", self.improver_class.qualified_name) @transaction.atomic def process_inferences(inferences: List[Inference], advisory: Advisory, improver_name: str): """ - An atomic transaction that updates both the Advisory (e.g. date_improved) + Return number of inferences processed. + An atomic transaction that updates both the Advisory (e.g. date_imported) and processes the given inferences to create or update corresponding database fields. This avoids failing the entire improver when only a single inference is erroneous. Also, the atomic transaction for every advisory and its - inferences makes sure that date_improved of advisory is consistent. + inferences makes sure that date_imported of advisory is consistent. """ + inferences_processed_count = 0 if not inferences: - logger.warn(f"Nothing to improve. Source: {improver_name} Advisory id: {advisory.id}") - return + logger.warning(f"Nothing to improve. Source: {improver_name} Advisory id: {advisory.id}") + return inferences_processed_count logger.info(f"Improving advisory id: {advisory.id}") for inference in inferences: vulnerability = get_or_create_vulnerability_and_aliases( vulnerability_id=inference.vulnerability_id, - alias_names=inference.aliases, + aliases=inference.aliases, summary=inference.summary, ) if not vulnerability: - logger.warn(f"Unable to get vulnerability for inference: {inference!r}") + logger.warning(f"Unable to get vulnerability for inference: {inference!r}") continue for ref in inference.references: @@ -139,8 +148,9 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver cwe_obj, created = Weakness.objects.get_or_create(cwe_id=cwe_id) cwe_obj.vulnerabilities.add(vulnerability) cwe_obj.save() - advisory.date_improved = datetime.now(timezone.utc) - advisory.save() + + inferences_processed_count += 1 + return inferences_processed_count def create_valid_vulnerability_reference(url, reference_id=None): @@ -164,69 +174,97 @@ def create_valid_vulnerability_reference(url, reference_id=None): return reference -def get_or_create_vulnerability_and_aliases(vulnerability_id, alias_names, summary): +def get_or_create_vulnerability_and_aliases( + aliases: List[str], vulnerability_id=None, summary=None +): """ Get or create vulnerabilitiy and aliases such that all existing and new aliases point to the same vulnerability """ - existing_vulns = set() - alias_names = set(alias_names) - new_alias_names = set() - for alias_name in alias_names: - try: - alias = Alias.objects.get(alias=alias_name) - existing_vulns.add(alias.vulnerability) - except Alias.DoesNotExist: - new_alias_names.add(alias_name) - - # If given set of aliases point to different vulnerabilities in the - # database, request is malformed - # TODO: It is possible that all those vulnerabilities are actually - # the same at data level, figure out a way to merge them - if len(existing_vulns) > 1: - logger.warn( - f"Given aliases {alias_names} already exist and do not point " - f"to a single vulnerability. Cannot improve. Skipped." - ) - return - - existing_alias_vuln = existing_vulns.pop() if existing_vulns else None - - if ( - existing_alias_vuln - and vulnerability_id - and existing_alias_vuln.vulnerability_id != vulnerability_id - ): - logger.warn( - f"Given aliases {alias_names!r} already exist and point to existing" - f"vulnerability {existing_alias_vuln}. Unable to create Vulnerability " - f"with vulnerability_id {vulnerability_id}. Skipped" - ) - return + aliases = set(alias.strip() for alias in aliases if alias and alias.strip()) + new_alias_names, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases) + + # All aliases must point to the same vulnerability + vulnerability = None + if existing_vulns: + if len(existing_vulns) != 1: + vcids = ", ".join(v.vulnerability_id for v in existing_vulns) + logger.error( + f"Cannot create vulnerability. " + f"Aliases {aliases} already exist and point " + f"to multiple vulnerabilities {vcids}." + ) + return + else: + vulnerability = existing_vulns.pop() + + if vulnerability_id and vulnerability.vulnerability_id != vulnerability_id: + logger.error( + f"Cannot create vulnerability. " + f"Aliases {aliases} already exist and point to a different " + f"vulnerability {vulnerability} than the requested " + f"vulnerability {vulnerability_id}." + ) + return - if existing_alias_vuln: - vulnerability = existing_alias_vuln - elif vulnerability_id: + if vulnerability_id and not vulnerability: try: vulnerability = Vulnerability.objects.get(vulnerability_id=vulnerability_id) except Vulnerability.DoesNotExist: - logger.warn( - f"Given vulnerability_id: {vulnerability_id} does not exist in the database" - ) + logger.error(f"Cannot get requested vulnerability {vulnerability_id}.") return + if vulnerability: + # TODO: We should keep multiple summaries, one for each advisory + # if summary and summary != vulnerability.summary: + # logger.warning( + # f"Inconsistent summary for {vulnerability.vulnerability_id}. " + # f"Existing: {vulnerability.summary!r}, provided: {summary!r}" + # ) + associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_alias_names) else: - vulnerability = Vulnerability(summary=summary) - vulnerability.save() + try: + vulnerability = create_vulnerability_and_add_aliases( + aliases=new_alias_names, summary=summary + ) + except Exception as e: + logger.error( + f"Cannot create vulnerability with summary {summary!r} and {new_alias_names!r} {e!r}.\n{traceback_format_exc()}." + ) + return + + return vulnerability + + +def get_vulns_for_aliases_and_get_new_aliases(aliases): + """ + Return ``new_aliases`` that are not in the database and + ``existing_vulns`` that point to the given ``aliases``. + """ + new_aliases = set(aliases) + existing_vulns = set() + for alias in Alias.objects.filter(alias__in=aliases): + existing_vulns.add(alias.vulnerability) + new_aliases.remove(alias.alias) + return new_aliases, existing_vulns - if summary and summary != vulnerability.summary: - logger.warn( - f"Inconsistent summary for {vulnerability!r}. " - f"Existing: {vulnerability.summary}, provided: {summary}" - ) - for alias_name in new_alias_names: +@transaction.atomic +def create_vulnerability_and_add_aliases(aliases, summary): + """ + Return a new ``vulnerability`` created with ``summary`` + and associate the ``vulnerability`` with ``aliases``. + Raise exception if no alias is associated with the ``vulnerability``. + """ + vulnerability = Vulnerability(summary=summary) + vulnerability.save() + associate_vulnerability_with_aliases(aliases, vulnerability) + if not vulnerability.aliases.count(): + raise Exception(f"Vulnerability {vulnerability.vcid} must have one or more aliases") + return vulnerability + + +def associate_vulnerability_with_aliases(aliases, vulnerability): + for alias_name in aliases: alias = Alias(alias=alias_name, vulnerability=vulnerability) alias.save() logger.info(f"New alias for {vulnerability!r}: {alias_name}") - - return vulnerability diff --git a/vulnerabilities/improver.py b/vulnerabilities/improver.py index 9b46cce0a..a7554001f 100644 --- a/vulnerabilities/improver.py +++ b/vulnerabilities/improver.py @@ -78,7 +78,7 @@ def to_dict(self): """ return { "vulnerability_id": self.vulnerability_id, - "aliases": [alias for alias in self.aliases], + "aliases": self.aliases, "confidence": self.confidence, "summary": self.summary, "affected_purls": [affected_purl.to_dict() for affected_purl in self.affected_purls], @@ -91,7 +91,7 @@ def to_dict(self): def from_advisory_data(cls, advisory_data, confidence, fixed_purl, affected_purls=None): """ Return an Inference object while keeping the same values as of advisory_data - for vulnerability_id, summary and references + for aliases, summary and references """ return cls( aliases=advisory_data.aliases, diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 50e8ead0a..35aef82b4 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -7,20 +7,25 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from vulnerabilities import importers -from vulnerabilities.improvers import default -from vulnerabilities.improvers import oval +from vulnerabilities.improvers import valid_versions + +# from vulnerabilities.improvers import vulnerability_status IMPROVERS_REGISTRY = [ - default.DefaultImprover, - importers.nginx.NginxBasicImprover, - importers.github.GitHubBasicImprover, - importers.debian.DebianBasicImprover, - importers.gitlab.GitLabBasicImprover, - importers.istio.IstioImprover, - oval.DebianOvalBasicImprover, - oval.UbuntuOvalBasicImprover, - importers.apache_httpd.ApacheHTTPDImprover, + valid_versions.GitHubBasicImprover, + valid_versions.GitLabBasicImprover, + valid_versions.NginxBasicImprover, + valid_versions.ApacheHTTPDImprover, + valid_versions.DebianBasicImprover, + valid_versions.NpmImprover, + valid_versions.ElixirImprover, + valid_versions.ApacheTomcatImprover, + valid_versions.ApacheKafkaImprover, + valid_versions.IstioImprover, + valid_versions.DebianOvalImprover, + valid_versions.UbuntuOvalImprover, + valid_versions.OSSFuzzImprover, + # vulnerability_status.VulnerabilityStatusImprover, ] IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 5c00a28c5..1b67eee5c 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -12,11 +12,13 @@ from typing import List from typing import Tuple +from django.db.models import Q from django.db.models.query import QuerySet from packageurl import PackageURL from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Importer from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference @@ -34,9 +36,17 @@ class DefaultImprover(Improver): information source. """ + importer: Importer + @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.all() + if hasattr(self, "importer"): + return ( + Advisory.objects.filter(Q(created_by=self.importer.qualified_name)) + .order_by("-date_collected") + .paginated() + ) + return Advisory.objects.all().order_by("-date_collected").paginated() def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: if not advisory_data: @@ -54,6 +64,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: affected_purls=affected_purls, fixed_purl=None, references=advisory_data.references, + weaknesses=advisory_data.weaknesses, ) else: for fixed_purl in fixed_purls or []: @@ -64,6 +75,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: affected_purls=affected_purls, fixed_purl=fixed_purl, references=advisory_data.references, + weaknesses=advisory_data.weaknesses, ) else: @@ -94,11 +106,11 @@ def get_exact_purls(affected_package: AffectedPackage) -> Tuple[List[PackageURL] >>> assert expected == got """ - vr = affected_package.affected_version_range - # We need ``if c`` below because univers returns None as version - # in case of vers:nginx/* - # TODO: Revisit after https://github.com/nexB/univers/issues/33 try: + vr = affected_package.affected_version_range + # We need ``if c`` below because univers returns None as version + # in case of vers:nginx/* + # TODO: Revisit after https://github.com/nexB/univers/issues/33 affected_purls = [] fixed_versions = [] if vr: @@ -120,5 +132,14 @@ def get_exact_purls(affected_package: AffectedPackage) -> Tuple[List[PackageURL] ] return affected_purls, fixed_purls except Exception as e: - logger.error(f"Failed to get exact purls for {affected_package} {e}") + logger.error(f"Failed to get exact purls for: {affected_package!r} with error: {e!r}") return [], [] + + +class DefaultImporter(DefaultImprover): + def __init__(self, advisories) -> None: + self.advisories = advisories + + @property + def interesting_advisories(self) -> QuerySet: + return self.advisories diff --git a/vulnerabilities/improvers/oval.py b/vulnerabilities/improvers/oval.py deleted file mode 100644 index f489dab8a..000000000 --- a/vulnerabilities/improvers/oval.py +++ /dev/null @@ -1,177 +0,0 @@ -import logging -from datetime import datetime -from typing import Iterable -from typing import List -from typing import Mapping -from typing import Optional - -from django.db.models import Q -from django.db.models.query import QuerySet -from packageurl import PackageURL - -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import UnMergeablePackageError -from vulnerabilities.importers.debian_oval import DebianOvalImporter -from vulnerabilities.importers.github import get_api_package_name -from vulnerabilities.importers.github import resolve_version_range -from vulnerabilities.importers.ubuntu import UbuntuImporter -from vulnerabilities.improver import Improver -from vulnerabilities.improver import Inference -from vulnerabilities.models import Advisory -from vulnerabilities.package_managers import DebianVersionAPI -from vulnerabilities.package_managers import LaunchpadVersionAPI -from vulnerabilities.package_managers import VersionAPI -from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage -from vulnerabilities.utils import get_affected_packages_by_patched_package -from vulnerabilities.utils import nearest_patched_package - -logger = logging.getLogger(__name__) - - -VERSION_API_CLASS_BY_NAMESPACE = { - "debian": DebianVersionAPI, - "ubuntu": LaunchpadVersionAPI, -} - - -def get_package_versions( - package_url: PackageURL, - until: Optional[datetime] = None, - versions_fetcher_by_purl: Mapping[PackageURL, VersionAPI] = {}, -) -> List[str]: - """ - Return a list of `valid_versions` for the `package_url` - """ - api_name = get_api_package_name(package_url) - if not api_name: - logger.error(f"Could not get versions for {package_url!r}") - return [] - versions_fetcher = versions_fetcher_by_purl.get(package_url) - if not versions_fetcher: - versions_fetcher: VersionAPI = VERSION_API_CLASS_BY_NAMESPACE[package_url.namespace] - versions_fetcher_by_purl[package_url] = versions_fetcher() - - versions_fetcher = versions_fetcher_by_purl[package_url] - - versions_fetcher_by_purl[package_url] = versions_fetcher - return versions_fetcher.get_until(package_name=api_name, until=until).valid_versions - - -class DebianOvalBasicImprover(Improver): - def __init__(self) -> None: - self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(Q(created_by=DebianOvalImporter.qualified_name)) - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return - try: - purl, affected_version_ranges, _ = AffectedPackage.merge( - advisory_data.affected_packages - ) - except UnMergeablePackageError: - logger.error(f"Cannot merge with different purls {advisory_data.affected_packages!r}") - return iter([]) - - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - valid_versions = get_package_versions( - package_url=purl, - until=advisory_data.date_published, - versions_fetcher_by_purl=self.versions_fetcher_by_purl, - ) - - for affected_version_range in affected_version_ranges: - aff_vers, unaff_vers = resolve_version_range( - affected_version_range=affected_version_range, - package_versions=valid_versions, - ) - affected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in aff_vers - ] - - unaffected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in unaff_vers - ] - - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) - - for ( - fixed_package, - affected_packages, - ) in get_affected_packages_by_patched_package(affected_packages).items(): - yield Inference.from_advisory_data( - advisory_data, - confidence=100, # We are getting all valid versions to get this inference - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) - - -class UbuntuOvalBasicImprover(Improver): - def __init__(self) -> None: - self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} - - @property - def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(Q(created_by=UbuntuImporter.qualified_name)) - - def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: - """ - Yield Inferences for the given advisory data - """ - if not advisory_data.affected_packages: - return - - for affected_package in advisory_data.affected_packages: - purl = affected_package.package - affected_version_range = affected_package.affected_version_range - pkg_type = purl.type - pkg_namespace = purl.namespace - pkg_name = purl.name - valid_versions = get_package_versions( - package_url=purl, - until=advisory_data.date_published, - versions_fetcher_by_purl=self.versions_fetcher_by_purl, - ) - - aff_vers, unaff_vers = resolve_version_range( - affected_version_range=affected_version_range, - package_versions=valid_versions, - ) - affected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in aff_vers - ] - - unaffected_purls = [ - PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) - for version in unaff_vers - ] - - affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) - - for ( - fixed_package, - affected_packages, - ) in get_affected_packages_by_patched_package(affected_packages).items(): - yield Inference.from_advisory_data( - advisory_data, - confidence=100, # We are getting all valid versions to get this inference - affected_purls=affected_packages, - fixed_purl=fixed_package, - ) diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py new file mode 100644 index 000000000..61f62d5a7 --- /dev/null +++ b/vulnerabilities/improvers/valid_versions.py @@ -0,0 +1,485 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import dataclasses +import logging +from datetime import datetime +from typing import Iterable +from typing import List +from typing import Mapping +from typing import Optional + +from django.db.models import Q +from django.db.models.query import QuerySet +from packageurl import PackageURL +from univers.versions import NginxVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Importer +from vulnerabilities.importer import UnMergeablePackageError +from vulnerabilities.importers.apache_httpd import ApacheHTTPDImporter +from vulnerabilities.importers.apache_kafka import ApacheKafkaImporter +from vulnerabilities.importers.apache_tomcat import ApacheTomcatImporter +from vulnerabilities.importers.debian import DebianImporter +from vulnerabilities.importers.debian_oval import DebianOvalImporter +from vulnerabilities.importers.elixir_security import ElixirSecurityImporter +from vulnerabilities.importers.github import GitHubAPIImporter +from vulnerabilities.importers.gitlab import GitLabAPIImporter +from vulnerabilities.importers.istio import IstioImporter +from vulnerabilities.importers.nginx import NginxImporter +from vulnerabilities.importers.npm import NpmImporter +from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter +from vulnerabilities.importers.ubuntu import UbuntuImporter +from vulnerabilities.improver import MAX_CONFIDENCE +from vulnerabilities.improver import Improver +from vulnerabilities.improver import Inference +from vulnerabilities.models import Advisory +from vulnerabilities.package_managers import GitHubTagsAPI +from vulnerabilities.package_managers import GoproxyVersionAPI +from vulnerabilities.package_managers import PackageVersion +from vulnerabilities.package_managers import VersionAPI +from vulnerabilities.package_managers import get_api_package_name +from vulnerabilities.package_managers import get_version_fetcher +from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage +from vulnerabilities.utils import clean_nginx_git_tag +from vulnerabilities.utils import evolve_purl +from vulnerabilities.utils import get_affected_packages_by_patched_package +from vulnerabilities.utils import is_vulnerable_nginx_version +from vulnerabilities.utils import nearest_patched_package +from vulnerabilities.utils import resolve_version_range + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(order=True) +class ValidVersionImprover(Improver): + importer: Importer + ignorable_versions: List[str] = dataclasses.field(default_factory=list) + + def __init__(self) -> None: + self.versions_fetcher_by_purl: Mapping[str, VersionAPI] = {} + + @property + def interesting_advisories(self) -> QuerySet: + return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated() + + def get_package_versions( + self, package_url: PackageURL, until: Optional[datetime] = None + ) -> List[str]: + """ + Return a list of `valid_versions` for the `package_url` + """ + api_name = get_api_package_name(package_url) + if not api_name: + logger.error(f"Could not get versions for {package_url!r}") + return [] + versions_fetcher = self.versions_fetcher_by_purl.get(package_url) + if not versions_fetcher: + versions_fetcher = get_version_fetcher(package_url) + self.versions_fetcher_by_purl[package_url] = versions_fetcher() + + versions_fetcher = self.versions_fetcher_by_purl[package_url] + + self.versions_fetcher_by_purl[package_url] = versions_fetcher + return versions_fetcher.get_until(package_name=api_name, until=until).valid_versions + + def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: + """ + Yield Inferences for the given advisory data + """ + mergable = True + if not advisory_data.affected_packages: + return + + try: + purl, affected_version_ranges, fixed_versions = AffectedPackage.merge( + advisory_data.affected_packages + ) + except UnMergeablePackageError: + logger.error(f"Cannot merge with different purls {advisory_data.affected_packages!r}") + mergable = False + + if not mergable: + for affected_package in advisory_data.affected_packages: + purl = affected_package.package + affected_version_range = affected_package.affected_version_range + fixed_version = affected_package.fixed_version + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + if not affected_version_range and fixed_version: + yield Inference.from_advisory_data( + advisory_data, # We are getting all valid versions to get this inference + confidence=MAX_CONFIDENCE, + affected_purls=[], + fixed_purl=PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(fixed_version), + ), + ) + else: + valid_versions = self.get_package_versions( + package_url=purl, until=advisory_data.date_published + ) + yield from self.generate_inferences( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + advisory_data=advisory_data, + ) + + else: + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + pkg_qualifiers = purl.qualifiers + fixed_purls = [ + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(version), + qualifiers=pkg_qualifiers, + ) + for version in fixed_versions + ] + if not affected_version_ranges: + for fixed_purl in fixed_purls or []: + yield Inference.from_advisory_data( + advisory_data, # We are getting all valid versions to get this inference + confidence=MAX_CONFIDENCE, + affected_purls=[], + fixed_purl=fixed_purl, + ) + else: + if purl.type == "golang": + # Problem with the Golang and Go that they provide full path + # FIXME: We need to get the PURL subpath for Go module + versions_fetcher = self.versions_fetcher_by_purl.get(purl) + if not versions_fetcher: + versions_fetcher = GoproxyVersionAPI() + self.versions_fetcher_by_purl[purl] = versions_fetcher + pkg_name = versions_fetcher.module_name_by_package_name.get(pkg_name, pkg_name) + + valid_versions = self.get_package_versions( + package_url=purl, until=advisory_data.date_published + ) + for affected_version_range in affected_version_ranges: + yield from self.generate_inferences( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + advisory_data=advisory_data, + ) + + def generate_inferences( + self, + affected_version_range, + pkg_type, + pkg_namespace, + pkg_name, + valid_versions, + advisory_data, + ): + """ + Generate Inferences for the given `affected_version_range` and `valid_versions` + """ + aff_vers, unaff_vers = resolve_version_range( + affected_version_range=affected_version_range, + ignorable_versions=self.ignorable_versions, + package_versions=valid_versions, + ) + + affected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, aff_vers) + ) + + unaffected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, unaff_vers) + ) + + affected_packages: List[LegacyAffectedPackage] = nearest_patched_package( + vulnerable_packages=affected_purls, resolved_packages=unaffected_purls + ) + + for ( + fixed_package, + affected_purls, + ) in get_affected_packages_by_patched_package(affected_packages).items(): + yield Inference.from_advisory_data( + advisory_data, + confidence=100, # We are getting all valid versions to get this inference + affected_purls=affected_purls, + fixed_purl=fixed_package, + ) + + def expand_verion_range_to_purls(self, pkg_type, pkg_namespace, pkg_name, versions): + for version in versions: + yield PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) + + +class NginxBasicImprover(Improver): + """ + Improve Nginx data by fetching the its GitHub repo versions and resolving + the vulnerable ranges. + """ + + @property + def interesting_advisories(self) -> QuerySet: + return Advisory.objects.filter(created_by=NginxImporter.qualified_name).paginated() + + def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: + all_versions = list(self.fetch_nginx_version_from_git_tags()) + yield from self.get_inferences_from_versions( + advisory_data=advisory_data, all_versions=all_versions + ) + + def get_inferences_from_versions( + self, advisory_data: AdvisoryData, all_versions: List[PackageVersion] + ) -> Iterable[Inference]: + """ + Yield inferences given an ``advisory_data`` and a ``all_versions`` of + PackageVersion. + """ + + try: + purl, affected_version_ranges, fixed_versions = AffectedPackage.merge( + advisory_data.affected_packages + ) + except UnMergeablePackageError: + logger.error( + f"NginxBasicImprover: Cannot merge with different purls: " + f"{advisory_data.affected_packages!r}" + ) + return iter([]) + + affected_purls = [] + for affected_version_range in affected_version_ranges: + for package_version in all_versions: + # FIXME: we should reference an NginxVersion tbd in univers + version = NginxVersion(package_version.value) + if is_vulnerable_nginx_version( + version=version, + affected_version_range=affected_version_range, + fixed_versions=fixed_versions, + ): + new_purl = evolve_purl(purl=purl, version=str(version)) + affected_purls.append(new_purl) + + # TODO: This also yields with a lower fixed version, maybe we should + # only yield fixes that are upgrades ? + for fixed_version in fixed_versions: + fixed_purl = evolve_purl(purl=purl, version=str(fixed_version)) + + yield Inference.from_advisory_data( + advisory_data, + # TODO: is 90 a correct confidence?? + confidence=90, + affected_purls=affected_purls, + fixed_purl=fixed_purl, + ) + + def fetch_nginx_version_from_git_tags(self): + """ + Yield all nginx PackageVersion from its git tags. + """ + nginx_versions = GitHubTagsAPI().fetch("nginx/nginx") + for version in nginx_versions: + cleaned = clean_nginx_git_tag(version.value) + yield PackageVersion(value=cleaned, release_date=version.release_date) + + +class ApacheHTTPDImprover(ValidVersionImprover): + importer = ApacheHTTPDImporter + ignorable_versions = { + "AGB_BEFORE_AAA_CHANGES", + "APACHE_1_2b1", + "APACHE_1_2b10", + "APACHE_1_2b11", + "APACHE_1_2b2", + "APACHE_1_2b3", + "APACHE_1_2b4", + "APACHE_1_2b5", + "APACHE_1_2b6", + "APACHE_1_2b7", + "APACHE_1_2b8", + "APACHE_1_2b9", + "APACHE_1_3_PRE_NT", + "APACHE_1_3a1", + "APACHE_1_3b1", + "APACHE_1_3b2", + "APACHE_1_3b3", + "APACHE_1_3b5", + "APACHE_1_3b6", + "APACHE_1_3b7", + "APACHE_2_0_2001_02_09", + "APACHE_2_0_52_WROWE_RC1", + "APACHE_2_0_ALPHA", + "APACHE_2_0_ALPHA_2", + "APACHE_2_0_ALPHA_3", + "APACHE_2_0_ALPHA_4", + "APACHE_2_0_ALPHA_5", + "APACHE_2_0_ALPHA_6", + "APACHE_2_0_ALPHA_7", + "APACHE_2_0_ALPHA_8", + "APACHE_2_0_ALPHA_9", + "APACHE_2_0_BETA_CANDIDATE_1", + "APACHE_BIG_SYMBOL_RENAME_POST", + "APACHE_BIG_SYMBOL_RENAME_PRE", + "CHANGES", + "HTTPD_LDAP_1_0_0", + "INITIAL", + "MOD_SSL_2_8_3", + "PCRE_3_9", + "POST_APR_SPLIT", + "PRE_APR_CHANGES", + "STRIKER_2_0_51_RC1", + "STRIKER_2_0_51_RC2", + "STRIKER_2_1_0_RC1", + "WROWE_2_0_43_PRE1", + "apache-1_3-merge-1-post", + "apache-1_3-merge-1-pre", + "apache-1_3-merge-2-post", + "apache-1_3-merge-2-pre", + "apache-apr-merge-3", + "apache-doc-split-01", + "dg_last_1_2_doc_merge", + "djg-apache-nspr-07", + "djg_nspr_split", + "moving_to_httpd_module", + "mpm-3", + "mpm-merge-1", + "mpm-merge-2", + "post_ajp_proxy", + "pre_ajp_proxy", + } + + +class ApacheTomcatImprover(ValidVersionImprover): + importer = ApacheTomcatImporter + ignorable_versions = [] + + +class ApacheKafkaImprover(ValidVersionImprover): + importer = ApacheKafkaImporter + ignorable_versions = [] + + +class DebianBasicImprover(ValidVersionImprover): + importer = DebianImporter + ignorable_versions = [] + + +class GitLabBasicImprover(ValidVersionImprover): + importer = GitLabAPIImporter + ignorable_versions = [] + + +class GitHubBasicImprover(ValidVersionImprover): + importer = GitHubAPIImporter + ignorable_versions = frozenset( + [ + "0.1-bulbasaur", + "0.1-charmander", + "0.3m1", + "0.3m2", + "0.3m3", + "0.3m4", + "0.3m5", + "0.4m1", + "0.4m2", + "0.4m3", + "0.4m4", + "0.4m5", + "0.5m1", + "0.5m2", + "0.5m3", + "0.5m4", + "0.5m5", + "0.6m1", + "0.6m2", + "0.6m3", + "0.6m4", + "0.6m5", + "0.6m6", + "0.7.10p1", + "0.7.11p1", + "0.7.11p2", + "0.7.11p3", + "0.8.1p1", + "0.8.3p1", + "0.8.4p1", + "0.8.4p2", + "0.8.6p1", + "0.8.7p1", + "0.9-doduo", + "0.9-eevee", + "0.9-fearow", + "0.9-gyarados", + "0.9-horsea", + "0.9-ivysaur", + "2013-01-21T20:33:09+0100", + "2013-01-23T17:11:52+0100", + "2013-02-01T20:50:46+0100", + "2013-02-02T19:59:03+0100", + "2013-02-02T20:23:17+0100", + "2013-02-08T17:40:57+0000", + "2013-03-27T16:32:26+0100", + "2013-05-09T12:47:53+0200", + "2013-05-10T17:55:56+0200", + "2013-05-14T20:16:05+0200", + "2013-06-01T10:32:51+0200", + "2013-07-19T09:11:08+0000", + "2013-08-12T21:48:56+0200", + "2013-09-11T19-27-10", + "2013-12-23T17-51-15", + "2014-01-12T15-52-10", + "2.0.1rc2-git", + "3.0.0b3-", + "3.0b6dev-r41684", + "-class.-jw.util.version.Version-", + ] + ) + + +class NpmImprover(ValidVersionImprover): + importer = NpmImporter + ignorable_versions = [] + + +class ElixirImprover(ValidVersionImprover): + importer = ElixirSecurityImporter + ignorable_versions = [] + + +class IstioImprover(ValidVersionImprover): + importer = IstioImporter + ignorable_versions = [] + + +class DebianOvalImprover(ValidVersionImprover): + importer = DebianOvalImporter + ignorable_versions = [] + + +class UbuntuOvalImprover(ValidVersionImprover): + importer = UbuntuImporter + ignorable_versions = [] + + +class OSSFuzzImprover(ValidVersionImprover): + importer = OSSFuzzImporter + ignorable_versions = [] diff --git a/vulnerabilities/improvers/vulnerability_status.py b/vulnerabilities/improvers/vulnerability_status.py new file mode 100644 index 000000000..b6db7d0d2 --- /dev/null +++ b/vulnerabilities/improvers/vulnerability_status.py @@ -0,0 +1,87 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +from typing import Iterable +from urllib.parse import urljoin + +from django.db.models import Q +from django.db.models.query import QuerySet + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importers.nvd import NVDImporter +from vulnerabilities.improver import Improver +from vulnerabilities.improver import Inference +from vulnerabilities.models import Advisory +from vulnerabilities.models import Alias +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityStatusType +from vulnerabilities.utils import fetch_response +from vulnerabilities.utils import get_item + +MITRE_API_URL = "https://cveawg.mitre.org/api/cve/" + + +class VulnerabilityStatusImprover(Improver): + """ + Update vulnerability with NVD statues + """ + + @property + def interesting_advisories(self) -> QuerySet: + return ( + Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name)) + .distinct("aliases") + .paginated() + ) + + def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: + """ + This is a work-around until we have new style importer and improver + and this get_inferences function updates the vulnerability status directly + # TODO: Replace this with new style improvers + """ + if not advisory_data: + return [] + aliases = advisory_data.aliases + # NVD Importer only has one alias in it and this a CVE + assert len(aliases) == 1 + cve_id = aliases[0] + if not cve_id.startswith("CVE"): + return [] + + alias = Alias.objects.get(alias=cve_id) + vulnerabilities = Vulnerability.objects.filter(aliases__alias=alias).distinct() + + for vuln in vulnerabilities: + status = get_status_from_api(cve_id=cve_id) + if not status: + status = VulnerabilityStatusType.PUBLISHED + vuln.status = status + vuln.save() + return [] + + +def get_status_from_api(cve_id): + """ + Return the CVE status from the MITRE API + """ + url = urljoin(MITRE_API_URL, cve_id) + try: + response = fetch_response(url=url) + except Exception as e: + return + response = response.json() + cve_state = get_item(response, "cveMetadata", "state") or None + tags = get_item(response, "containers", "cna", "tags") or [] + if "disputed" in tags: + return VulnerabilityStatusType.DISPUTED + if cve_state and cve_state == "REJECTED": + return VulnerabilityStatusType.INVALID + return VulnerabilityStatusType.PUBLISHED diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 77ae5b6a6..5ae885299 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -29,19 +29,20 @@ def add_arguments(self, parser): parser.add_argument("sources", nargs="*", help="Fully qualified importer name to run") def handle(self, *args, **options): - if options["list"]: - self.list_sources() - return - - if options["all"]: - self.import_data(importers=IMPORTERS_REGISTRY.values()) - return - - sources = options["sources"] - if not sources: - raise CommandError('Please provide at least one importer to run or use "--all".') - - self.import_data(validate_importers(sources)) + try: + if options["list"]: + self.list_sources() + elif options["all"]: + self.import_data(importers=IMPORTERS_REGISTRY.values()) + else: + sources = options["sources"] + if not sources: + raise CommandError( + 'Please provide at least one importer to run or use "--all".' + ) + self.import_data(validate_importers(sources)) + except KeyboardInterrupt: + raise CommandError("Keyboard interrupt received. Stopping...") def list_sources(self): self.stdout.write("Vulnerability data can be imported from the following importers:") diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index 6055798f8..e14c2bacc 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -31,19 +31,20 @@ def add_arguments(self, parser): parser.add_argument("sources", nargs="*", help="Fully qualified improver name to run") def handle(self, *args, **options): - if options["list"]: - self.list_sources() - return - - if options["all"]: - self.improve_data(IMPROVERS_REGISTRY.values()) - return - - sources = options["sources"] - if not sources: - raise CommandError('Please provide at least one improver to run or use "--all".') - - self.improve_data(validate_improvers(sources)) + try: + if options["list"]: + self.list_sources() + elif options["all"]: + self.improve_data(IMPROVERS_REGISTRY.values()) + else: + sources = options["sources"] + if not sources: + raise CommandError( + 'Please provide at least one improver to run or use "--all".' + ) + self.improve_data(validate_improvers(sources)) + except KeyboardInterrupt: + raise CommandError("Keyboard interrupt received. Stopping...") def list_sources(self): improvers = list(IMPROVERS_REGISTRY) @@ -56,7 +57,7 @@ def improve_data(self, improvers): for improver in improvers: self.stdout.write(f"Improving data using {improver.qualified_name}") try: - ImproveRunner(improver).run() + ImproveRunner(improver_class=improver).run() self.stdout.write( self.style.SUCCESS( f"Successfully improved data using {improver.qualified_name}" diff --git a/vulnerabilities/middleware/ban_user_agent.py b/vulnerabilities/middleware/ban_user_agent.py new file mode 100644 index 000000000..6aafc490c --- /dev/null +++ b/vulnerabilities/middleware/ban_user_agent.py @@ -0,0 +1,18 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from django.http import HttpResponseNotFound +from django.utils.deprecation import MiddlewareMixin + + +class BanUserAgent(MiddlewareMixin): + def process_request(self, request): + user_agent = request.META.get("HTTP_USER_AGENT", None) + if user_agent and "bytedance" in user_agent: + return HttpResponseNotFound(404) diff --git a/vulnerabilities/migrations/0040_remove_advisory_date_improved_advisory_date_imported.py b/vulnerabilities/migrations/0040_remove_advisory_date_improved_advisory_date_imported.py new file mode 100644 index 000000000..c609f20f8 --- /dev/null +++ b/vulnerabilities/migrations/0040_remove_advisory_date_improved_advisory_date_imported.py @@ -0,0 +1,24 @@ +# Generated by Django 4.1.7 on 2023-09-05 09:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0039_alter_vulnerabilityseverity_scoring_system"), + ] + + operations = [ + migrations.RemoveField( + model_name="advisory", + name="date_improved", + ), + migrations.AddField( + model_name="advisory", + name="date_imported", + field=models.DateTimeField( + blank=True, help_text="UTC Date on which the advisory was imported", null=True + ), + ), + ] diff --git a/vulnerabilities/migrations/0041_remove_vulns_with_empty_aliases.py b/vulnerabilities/migrations/0041_remove_vulns_with_empty_aliases.py new file mode 100644 index 000000000..d2c44c280 --- /dev/null +++ b/vulnerabilities/migrations/0041_remove_vulns_with_empty_aliases.py @@ -0,0 +1,37 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0040_remove_advisory_date_improved_advisory_date_imported"), + ] + + def remove_vulns_with_empty_aliases(apps, _): + Vulnerability = apps.get_model("vulnerabilities", "Vulnerability") + Package = apps.get_model("vulnerabilities", "Package") + packages = [] + vulnerabilities = [] + for vuln in Vulnerability.objects.filter(aliases=None).prefetch_related( + "packages" + ): + # Delete packages associated with that vulnerability + for package in vuln.packages.all(): + packages.append(package.id) + vulnerabilities.append(vuln.id) + + Vulnerability.objects.filter(id__in=vulnerabilities).delete() + Package.objects.filter(id__in=packages).delete() + + operations = [ + migrations.RunPython(remove_vulns_with_empty_aliases, reverse_code=migrations.RunPython.noop), + ] diff --git a/vulnerabilities/migrations/0042_advisory_status_vulnerability_status.py b/vulnerabilities/migrations/0042_advisory_status_vulnerability_status.py new file mode 100644 index 000000000..6fbae367e --- /dev/null +++ b/vulnerabilities/migrations/0042_advisory_status_vulnerability_status.py @@ -0,0 +1,21 @@ +# Generated by Django 4.1.7 on 2023-09-29 05:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0041_remove_vulns_with_empty_aliases"), + ] + + operations = [ + migrations.AddField( + model_name="vulnerability", + name="status", + field=models.IntegerField( + choices=[(1, "published"), (2, "disputed"), (3, "invalid")], + default=1, + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7b6c9fcc6..0a724031f 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -11,16 +11,19 @@ import json import logging from contextlib import suppress +from typing import Any from cwe2.database import Database from django.contrib.auth import get_user_model from django.contrib.auth.models import UserManager from django.core import exceptions from django.core.exceptions import ValidationError +from django.core.paginator import Paginator from django.core.validators import MaxValueValidator from django.core.validators import MinValueValidator from django.db import models from django.db.models import Count +from django.db.models import Prefetch from django.db.models import Q from django.db.models.functions import Length from django.db.models.functions import Trim @@ -30,11 +33,9 @@ from packageurl.contrib.django.models import PackageURLQuerySet from packageurl.contrib.django.models import without_empty_values from rest_framework.authtoken.models import Token +from univers import versions +from univers.version_range import RANGE_CLASS_BY_SCHEMES -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Reference -from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import build_vcid from vulnerabilities.utils import remove_qualifiers_and_subpath @@ -53,8 +54,28 @@ def get_or_none(self, *args, **kwargs): with suppress(self.model.DoesNotExist, ValidationError): return self.get(*args, **kwargs) + def paginated(self, per_page=5000): + """ + Iterate over a (large) QuerySet by chunks of ``per_page`` items. + This technique is essential for preventing memory issues when iterating + See these links for inspiration: + https://nextlinklabs.com/resources/insights/django-big-data-iteration + https://stackoverflow.com/questions/4222176/why-is-iterating-through-a-large-django-queryset-consuming-massive-amounts-of-me/ + """ + paginator = Paginator(self, per_page=per_page) + for page_number in paginator.page_range: + page = paginator.page(page_number) + for object in page.object_list: + yield object + class VulnerabilityQuerySet(BaseQuerySet): + def affecting_vulnerabilities(self): + """ + Return a queryset of Vulnerability that affect a package. + """ + return self.filter(packagerelatedvulnerability__fix=False) + def with_cpes(self): """ Return a queryset of Vulnerability that have one or more NVD CPE references. @@ -137,6 +158,14 @@ def with_package_counts(self): ) +class VulnerabilityStatusType(models.IntegerChoices): + """List of vulnerability statuses.""" + + PUBLISHED = 1, "Published" + DISPUTED = 2, "Disputed" + INVALID = 3, "Invalid" + + class Vulnerability(models.Model): """ A software vulnerability with a unique identifier and alternate ``aliases``. @@ -165,6 +194,10 @@ class Vulnerability(models.Model): through="PackageRelatedVulnerability", ) + status = models.IntegerField( + choices=VulnerabilityStatusType.choices, default=VulnerabilityStatusType.PUBLISHED + ) + objects = VulnerabilityQuerySet.as_manager() class Meta: @@ -214,6 +247,11 @@ def get_aliases(self): alias = get_aliases + @property + def get_status_label(self): + label_by_status = {choice[0]: choice[1] for choice in VulnerabilityStatusType.choices} + return label_by_status.get(self.status) or VulnerabilityStatusType.PUBLISHED.label + def get_absolute_url(self): """ Return this Vulnerability details absolute URL. @@ -260,17 +298,26 @@ class Weakness(models.Model): vulnerabilities = models.ManyToManyField(Vulnerability, related_name="weaknesses") db = Database() + @property + def weakness(self): + """ + Return a queryset of Weakness for this vulnerability. + """ + try: + weakness = self.db.get(self.cwe_id) + return weakness + except Exception as e: + logger.warning(f"Could not find CWE {self.cwe_id}: {e}") + @property def name(self): """Return the weakness's name.""" - weakness = self.db.get(self.cwe_id) - return weakness.name + return self.weakness.name if self.weakness else "" @property def description(self): """Return the weakness's description.""" - weakness = self.db.get(self.cwe_id) - return weakness.description + return self.weakness.description if self.weakness else "" class VulnerabilityReferenceQuerySet(BaseQuerySet): @@ -366,6 +413,24 @@ def purl_to_dict(purl: PackageURL): class PackageQuerySet(BaseQuerySet, PackageURLQuerySet): + def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): + """ + Return a queryset of all the package versions of this `package` that fix any vulnerability. + If `fix` is False, return all package versions whether or not they fix a vulnerability. + """ + filter_dict = { + "name": purl.name, + "namespace": purl.namespace, + "type": purl.type, + "qualifiers": purl.qualifiers, + "subpath": purl.subpath, + } + + if fix: + filter_dict["packagerelatedvulnerability__fix"] = True + + return Package.objects.filter(**filter_dict).distinct() + def get_or_create_from_purl(self, purl: PackageURL): """ Return an existing or new Package (created if neeed) given a @@ -563,7 +628,6 @@ def __str__(self): return self.package_url @property - # TODO: consider renaming to "affected_by" def affected_by(self): """ Return a queryset of vulnerabilities affecting this package. @@ -604,6 +668,144 @@ def get_absolute_url(self): """ return reverse("package_details", args=[self.purl]) + def sort_by_version(self, packages): + """ + Return a list of `packages` sorted by version. + """ + if not packages: + return [] + + return sorted( + packages, + key=lambda x: self.version_class(x.version), + ) + + @property + def version_class(self): + return RANGE_CLASS_BY_SCHEMES[self.type].version_class + + @property + def current_version(self): + return self.version_class(self.version) + + @property + def fixed_package_details(self): + """ + Return a mapping of vulnerabilities that affect this package and the next and + latest non-vulnerable versions. + """ + package_details = {} + package_details["purl"] = PackageURL.from_string(self.purl) + + next_non_vulnerable, latest_non_vulnerable = self.get_non_vulnerable_versions() + package_details["next_non_vulnerable"] = next_non_vulnerable + package_details["latest_non_vulnerable"] = latest_non_vulnerable + + package_details["vulnerabilities"] = self.get_affecting_vulnerabilities() + + return package_details + + def get_non_vulnerable_versions(self): + """ + Return a tuple of the next and latest non-vulnerable versions as PackageURLs. Return a tuple of + (None, None) if there is no non-vulnerable version. + """ + package_versions = Package.objects.get_fixed_by_package_versions(self, fix=False) + + non_vulnerable_versions = [] + for version in package_versions: + if not version.is_vulnerable: + non_vulnerable_versions.append(version) + + later_non_vulnerable_versions = [] + for non_vuln_ver in non_vulnerable_versions: + if self.version_class(non_vuln_ver.version) > self.current_version: + later_non_vulnerable_versions.append(non_vuln_ver) + + if later_non_vulnerable_versions: + sorted_versions = self.sort_by_version(later_non_vulnerable_versions) + next_non_vulnerable_version = sorted_versions[0] + latest_non_vulnerable_version = sorted_versions[-1] + + next_non_vulnerable = PackageURL.from_string(next_non_vulnerable_version.purl) + latest_non_vulnerable = PackageURL.from_string(latest_non_vulnerable_version.purl) + + return next_non_vulnerable, latest_non_vulnerable + + return None, None + + def get_affecting_vulnerabilities(self): + """ + Return a list of vulnerabilities that affect this package together with information regarding + the versions that fix the vulnerabilities. + """ + package_details_vulns = [] + + fixed_by_packages = Package.objects.get_fixed_by_package_versions(self, fix=True) + + package_vulnerabilities = self.vulnerabilities.affecting_vulnerabilities().prefetch_related( + Prefetch( + "packages", + queryset=fixed_by_packages, + to_attr="fixed_packages", + ) + ) + + for vuln in package_vulnerabilities: + package_details_vulns.append({"vulnerability": vuln}) + later_fixed_packages = [] + + for fixed_pkg in vuln.fixed_packages: + if fixed_pkg not in fixed_by_packages: + continue + fixed_version = self.version_class(fixed_pkg.version) + if fixed_version > self.current_version: + later_fixed_packages.append(fixed_pkg) + + next_fixed_package = None + next_fixed_package_vulns = [] + + sort_fixed_by_packages_by_version = [] + if later_fixed_packages: + sort_fixed_by_packages_by_version = self.sort_by_version(later_fixed_packages) + + fixed_by_pkgs = [] + + for vuln_details in package_details_vulns: + if vuln_details["vulnerability"] != vuln: + continue + vuln_details["fixed_by_purl"] = [] + vuln_details["fixed_by_purl_vulnerabilities"] = [] + + for fixed_by_pkg in sort_fixed_by_packages_by_version: + fixed_by_package_details = {} + fixed_by_purl = PackageURL.from_string(fixed_by_pkg.purl) + next_fixed_package_vulns = list(fixed_by_pkg.affected_by) + + fixed_by_package_details["fixed_by_purl"] = fixed_by_purl + fixed_by_package_details[ + "fixed_by_purl_vulnerabilities" + ] = next_fixed_package_vulns + fixed_by_pkgs.append(fixed_by_package_details) + + vuln_details["fixed_by_package_details"] = fixed_by_pkgs + + return package_details_vulns + + @property + def fixing_vulnerabilities(self): + """ + Return a queryset of Vulnerabilities that are fixed by this `package`. + """ + return self.vulnerabilities.filter(packagerelatedvulnerability__fix=True) + + @property + def affecting_vulnerabilities(self): + """ + Return a queryset of Vulnerabilities that affect this `package`. + """ + return self.vulnerabilities.filter(packagerelatedvulnerability__fix=False) + class PackageRelatedVulnerability(models.Model): """ @@ -628,6 +830,7 @@ class PackageRelatedVulnerability(models.Model): "module name responsible for creating this relation. Eg:" "vulnerabilities.importers.nginx.NginxBasicImprover", ) + from vulnerabilities.improver import MAX_CONFIDENCE confidence = models.PositiveIntegerField( default=MAX_CONFIDENCE, @@ -735,6 +938,8 @@ class Alias(models.Model): alias = models.CharField( max_length=50, unique=True, + blank=False, + null=False, help_text="An alias is a unique vulnerability identifier in some database, " "such as CVE-2020-2233", ) @@ -770,6 +975,10 @@ def url(self): return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" +class AdvisoryQuerySet(BaseQuerySet): + pass + + class Advisory(models.Model): """ An advisory represents data directly obtained from upstream transformed @@ -798,10 +1007,8 @@ class Advisory(models.Model): ) weaknesses = models.JSONField(blank=True, default=list, help_text="A list of CWE ids") date_collected = models.DateTimeField(help_text="UTC Date on which the advisory was collected") - date_improved = models.DateTimeField( - blank=True, - null=True, - help_text="Latest date on which the advisory was improved by an improver", + date_imported = models.DateTimeField( + blank=True, null=True, help_text="UTC Date on which the advisory was imported" ) created_by = models.CharField( max_length=100, @@ -809,6 +1016,7 @@ class Advisory(models.Model): "module name importing the advisory. Eg:" "vulnerabilities.importers.nginx.NginxImporter", ) + objects = AdvisoryQuerySet.as_manager() class Meta: unique_together = ["aliases", "unique_content_id", "date_published"] @@ -816,13 +1024,17 @@ class Meta: def save(self, *args, **kwargs): checksum = hashlib.md5() - for field in (self.summary, self.affected_packages, self.references): + for field in (self.summary, self.affected_packages, self.references, self.weaknesses): value = json.dumps(field, separators=(",", ":")).encode("utf-8") checksum.update(value) self.unique_content_id = checksum.hexdigest() super().save(*args, **kwargs) - def to_advisory_data(self) -> AdvisoryData: + def to_advisory_data(self) -> "AdvisoryData": + from vulnerabilities.importer import AdvisoryData + from vulnerabilities.importer import AffectedPackage + from vulnerabilities.importer import Reference + return AdvisoryData( aliases=self.aliases, summary=self.summary, diff --git a/vulnerabilities/package_managers.py b/vulnerabilities/package_managers.py index f210f0f98..efce7ec1b 100644 --- a/vulnerabilities/package_managers.py +++ b/vulnerabilities/package_managers.py @@ -556,6 +556,22 @@ def fetch(self, pkg: str) -> Iterable[PackageVersion]: ) +class ConanVersionAPI(VersionAPI): + """ + Fetch versions of ``conan`` packages from the Conan API + """ + + package_type = "conan" + + def fetch(self, pkg: str) -> Iterable[PackageVersion]: + response = get_response( + url=f"https://conan.io/center/api/ui/details?name={pkg}&user=_&channel=_", + content_type="json", + ) + for release in response["versions"]: + yield PackageVersion(value=release["version"]) + + class GoproxyVersionAPI(VersionAPI): """ Fetch versions of Go "golang" packages from the Go proxy API @@ -688,10 +704,18 @@ def fetch(self, pkg: str) -> Iterable[PackageVersion]: CratesVersionAPI, DebianVersionAPI, GitHubTagsAPI, + ConanVersionAPI, } VERSION_API_CLASSES_BY_PACKAGE_TYPE = {cls.package_type: cls for cls in VERSION_API_CLASSES} +VERSION_API_CLASSES_BY_PACKAGE_TYPE["apache"] = GitHubTagsAPI + +VERSION_API_CLASS_BY_PACKAGE_NAMESPACE = { + "debian": DebianVersionAPI, + "ubuntu": LaunchpadVersionAPI, +} + def get_api_package_name(purl: PackageURL) -> str: """ @@ -703,11 +727,21 @@ def get_api_package_name(purl: PackageURL) -> str: """ if not purl.name: return None + if purl.type == "apache": + return f"{purl.type}/{purl.name}" if purl.type in ("nuget", "pypi", "gem", "deb") or not purl.namespace: return purl.name if purl.type == "maven": return f"{purl.namespace}:{purl.name}" - if purl.type in ("composer", "golang", "npm"): + if purl.type in ("composer", "golang", "npm", "github"): return f"{purl.namespace}/{purl.name}" logger.error(f"get_api_package_name: Unknown PURL {purl!r}") + + +def get_version_fetcher(package_url): + if package_url.type == "deb": + versions_fetcher: VersionAPI = VERSION_API_CLASS_BY_PACKAGE_NAMESPACE[package_url.namespace] + else: + versions_fetcher: VersionAPI = VERSION_API_CLASSES_BY_PACKAGE_TYPE[package_url.type] + return versions_fetcher diff --git a/vulnerabilities/templates/includes/pagination.html b/vulnerabilities/templates/includes/pagination.html index 85020e4d6..0d6dad430 100644 --- a/vulnerabilities/templates/includes/pagination.html +++ b/vulnerabilities/templates/includes/pagination.html @@ -1,12 +1,12 @@