From 226a7ad8317d9cb895436eb07600339231eafcb4 Mon Sep 17 00:00:00 2001 From: ziad Date: Sun, 14 Aug 2022 22:02:56 +0200 Subject: [PATCH 1/3] run importers in parallel Signed-off-by: ziad --- vulnerabilities/management/commands/import.py | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 17c2140cc..42d904199 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -6,7 +6,7 @@ # See https://github.com/nexB/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - +import threading import traceback from django.core.management.base import BaseCommand @@ -27,6 +27,10 @@ def add_arguments(self, parser): ) parser.add_argument("--all", action="store_true", help="Run all available importers") + parser.add_argument( + "--all_in_parallel", action="store_true", help="Run all available importers in parallel" + ) + parser.add_argument("sources", nargs="*", help="Fully qualified importer name to run") def handle(self, *args, **options): @@ -38,6 +42,10 @@ def handle(self, *args, **options): self.import_data(IMPORTERS_REGISTRY.values()) return + if options["all_in_parallel"]: + self.import_data_in_parallel(IMPORTERS_REGISTRY.values()) + return + sources = options["sources"] if not sources: raise CommandError('Please provide at least one importer to run or use "--all".') @@ -77,6 +85,36 @@ def import_data(self, importers): if failed_importers: raise CommandError(f"{len(failed_importers)} failed!: {','.join(failed_importers)}") + def import_data_in_parallel(self, importers): + failed_importers = [] + thread_list = [] + for importer in importers: + self.stdout.write(f"Importing data using {importer.qualified_name}") + try: + thread = threading.Thread( + target=ImportRunner(importer).run(), name=importer.qualified_name + ) + thread.start() + thread_list.append(thread) + except Exception: + failed_importers.append(importer.qualified_name) + traceback.print_exc() + self.stdout.write( + self.style.ERROR( + f"Failed to run importer {importer.qualified_name}. Continuing..." + ) + ) + for thread in thread_list: + thread.join() + + success_list = {value for value in IMPORTERS_REGISTRY if value not in failed_importers} + if success_list: + self.stdout.write( + self.style.SUCCESS(f"Successfully imported data using {success_list} ") + ) + if failed_importers: + raise CommandError(f"{len(failed_importers)} failed!: {','.join(failed_importers)}") + def validate_importers(sources): importers = [] From daa13744647d55b123005a311e09b9e5e858b25d Mon Sep 17 00:00:00 2001 From: ziad Date: Wed, 24 Aug 2022 17:59:57 +0200 Subject: [PATCH 2/3] test redhat importer performance by profiling Signed-off-by: ziad Signed-off-by: ziad --- vulnerabilities/management/commands/import.py | 35 ------------------- vulnerabilities/tests/test_performance.py | 33 +++++++++++++++++ 2 files changed, 33 insertions(+), 35 deletions(-) create mode 100644 vulnerabilities/tests/test_performance.py diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 42d904199..ae74889e9 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -6,7 +6,6 @@ # See https://github.com/nexB/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import threading import traceback from django.core.management.base import BaseCommand @@ -42,10 +41,6 @@ def handle(self, *args, **options): self.import_data(IMPORTERS_REGISTRY.values()) return - if options["all_in_parallel"]: - self.import_data_in_parallel(IMPORTERS_REGISTRY.values()) - return - sources = options["sources"] if not sources: raise CommandError('Please provide at least one importer to run or use "--all".') @@ -85,36 +80,6 @@ def import_data(self, importers): if failed_importers: raise CommandError(f"{len(failed_importers)} failed!: {','.join(failed_importers)}") - def import_data_in_parallel(self, importers): - failed_importers = [] - thread_list = [] - for importer in importers: - self.stdout.write(f"Importing data using {importer.qualified_name}") - try: - thread = threading.Thread( - target=ImportRunner(importer).run(), name=importer.qualified_name - ) - thread.start() - thread_list.append(thread) - except Exception: - failed_importers.append(importer.qualified_name) - traceback.print_exc() - self.stdout.write( - self.style.ERROR( - f"Failed to run importer {importer.qualified_name}. Continuing..." - ) - ) - for thread in thread_list: - thread.join() - - success_list = {value for value in IMPORTERS_REGISTRY if value not in failed_importers} - if success_list: - self.stdout.write( - self.style.SUCCESS(f"Successfully imported data using {success_list} ") - ) - if failed_importers: - raise CommandError(f"{len(failed_importers)} failed!: {','.join(failed_importers)}") - def validate_importers(sources): importers = [] diff --git a/vulnerabilities/tests/test_performance.py b/vulnerabilities/tests/test_performance.py new file mode 100644 index 000000000..1dd376aee --- /dev/null +++ b/vulnerabilities/tests/test_performance.py @@ -0,0 +1,33 @@ +# +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import pytest + +# this import are used in the script +from vulnerabilities.importers import redhat + +script = """for i, data in enumerate(redhat.RedhatImporter().advisory_data()): + if 1 == 100: + break""" + + +@pytest.mark.skip("Use only for local profiling") +@pytest.mark.django_db +class TestImporter: + def test_redhat_importer_performance_profiling(self): + print_profiling_status(script, "redhat.txt") + + +def print_profiling_status(test_py, stats_file, top=50): + import cProfile as profile + import pstats + + profile.runctx(test_py, globals(), locals(), stats_file) + p = pstats.Stats(stats_file) + p.sort_stats("time").print_stats(top) From 1cbcd097b6b8f11bf4efdab65abf2c70627faf85 Mon Sep 17 00:00:00 2001 From: ziad Date: Tue, 6 Sep 2022 00:35:10 +0200 Subject: [PATCH 3/3] remove a all_in_parallel command Signed-off-by: ziad --- vulnerabilities/management/commands/import.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index ae74889e9..386cbbcae 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -26,10 +26,6 @@ def add_arguments(self, parser): ) parser.add_argument("--all", action="store_true", help="Run all available importers") - parser.add_argument( - "--all_in_parallel", action="store_true", help="Run all available importers in parallel" - ) - parser.add_argument("sources", nargs="*", help="Fully qualified importer name to run") def handle(self, *args, **options):