From a2e392e253d566ba59a5efd63b0073f68c2c33e3 Mon Sep 17 00:00:00 2001 From: tdruez <489057+tdruez@users.noreply.github.com> Date: Tue, 25 Jun 2024 15:41:37 +0400 Subject: [PATCH] Add a flush-projects management command for bulk deletion #1289 (#1291) Signed-off-by: tdruez --- CHANGELOG.rst | 4 + docs/command-line-interface.rst | 21 ++++ docs/faq.rst | 29 ++++++ .../management/commands/flush-projects.py | 97 +++++++++++++++++++ scanpipe/tests/test_commands.py | 22 +++++ 5 files changed, 173 insertions(+) create mode 100644 scanpipe/management/commands/flush-projects.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fe9ca12fc..5ea067701 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,10 @@ v34.6.4 (unreleased) - Add all "classify" plugin fields from scancode-toolkit on the CodebaseResource model. https://github.com/nexB/scancode.io/issues/1275 +- Add a ``flush-projects`` management command, to Delete all project data and their + related work directories created more than a specified number of days ago. + https://github.com/nexB/scancode.io/issues/1289 + v34.6.3 (2024-06-21) -------------------- diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index c78e01f2d..9e154a90c 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -298,6 +298,27 @@ Optional arguments: - ``--no-input`` Does not prompt the user for input of any kind. +.. _cli_flush_projects: + +`$ scanpipe flush-projects` +--------------------------- + +Delete all project data and their related work directories created more than a +specified number of days ago. + +Optional arguments: + +- ``---retain-days RETAIN_DAYS`` Specify the number of days to retain data. + All data older than this number of days will be deleted. + **Defaults to 0 (delete all data)**. + + For example, to delete all projects created more than one week ago:: + + scanpipe flush-projects --retain-days 7 + +- ``--no-input`` Does not prompt the user for input of any kind. + + .. _cli_create_user: `$ scanpipe create-user ` diff --git a/docs/faq.rst b/docs/faq.rst index 97f2bd7f7..782de1b25 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -249,3 +249,32 @@ Note that only the HTTPS type of URL is supported:: A GitHub repository URL example:: https://github.com/username/repository.git + +How can I cleanup my ScanCode.io installation, removing all projects and related data? +-------------------------------------------------------------------------------------- + +You can use the :ref:`cli_flush_projects` command to perform bulk deletion of projects +and their associated data stored on disk:: + + $ scanpipe flush-projects + +**Confirmation will be required before deletion.** + +To automate this process, such as running it from a cron job, you can use the +``--no-input`` option to skip confirmation prompts. + +Additionally, you can retain specific projects and their data based on their +creation date using the ``--retain-days`` option. + +Here's an example of a crontab entry that runs daily and flushes all projects and +data older than 7 days:: + + @daily scanpipe flush-projects --retain-days 7 --no-input + +.. note:: If you are use Docker for running ScanCode.io, you can run the scanpipe + ``flush-projects`` command using:: + + docker compose run --rm web scanpipe flush-projects + + See :ref:`command_line_interface` chapter for more information about the scanpipe + command. diff --git a/scanpipe/management/commands/flush-projects.py b/scanpipe/management/commands/flush-projects.py new file mode 100644 index 000000000..2ae07142d --- /dev/null +++ b/scanpipe/management/commands/flush-projects.py @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +import datetime +import sys + +from django.core.management.base import BaseCommand +from django.template.defaultfilters import pluralize +from django.utils import timezone + +from scanpipe.models import Project + + +class Command(BaseCommand): + help = ( + "Delete all project data and their related work directories created more than " + "a specified number of days ago." + ) + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + "--retain-days", + type=int, + help=( + "Optional. Specify the number of days to retain data. " + "All data older than this number of days will be deleted. " + "Defaults to 0 (delete all data)." + ), + default=0, + ) + parser.add_argument( + "--no-input", + action="store_false", + dest="interactive", + help="Do not prompt the user for input of any kind.", + ) + + def handle(self, *inputs, **options): + verbosity = options["verbosity"] + retain_days = options["retain_days"] + projects = Project.objects.all() + + if retain_days: + cutoff_date = timezone.now() - datetime.timedelta(days=retain_days) + projects = projects.filter(created_date__lt=cutoff_date) + + projects_count = projects.count() + if projects_count == 0: + if verbosity > 0: + self.stdout.write("No projects to remove.") + sys.exit(0) + + if options["interactive"]: + confirm = input( + f"You have requested the deletion of {projects_count} " + f"project{pluralize(projects_count)}.\n" + "This will IRREVERSIBLY DESTROY all data related to those projects.\n" + "Are you sure you want to do this?\n" + "Type 'yes' to continue, or 'no' to cancel: " + ) + if confirm != "yes": + if verbosity > 0: + self.stdout.write("Flush cancelled.") + sys.exit(0) + + deletion_count = 0 + for project in projects: + project.delete() + deletion_count += 1 + + if verbosity > 0: + msg = ( + f"{deletion_count} project{pluralize(deletion_count)} and " + f"{pluralize(deletion_count, 'its,their')} related data have been " + f"removed." + ) + self.stdout.write(msg, self.style.SUCCESS) diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index abd7a77e5..3b99ee2ab 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -611,6 +611,28 @@ def test_scanpipe_management_command_reset_project(self): self.assertEqual(1, len(Project.get_root_content(project.input_path))) self.assertEqual(0, len(Project.get_root_content(project.codebase_path))) + def test_scanpipe_management_command_flush_projects(self): + project1 = Project.objects.create(name="project1") + project2 = Project.objects.create(name="project2") + ten_days_ago = timezone.now() - datetime.timedelta(days=10) + project2.update(created_date=ten_days_ago) + + out = StringIO() + options = ["--retain-days", 7, "--no-color", "--no-input"] + call_command("flush-projects", *options, stdout=out) + out_value = out.getvalue().strip() + expected = "1 project and its related data have been removed." + self.assertEqual(expected, out_value) + self.assertEqual(project1, Project.objects.get()) + + Project.objects.create(name="project2") + out = StringIO() + options = ["--no-color", "--no-input"] + call_command("flush-projects", *options, stdout=out) + out_value = out.getvalue().strip() + expected = "2 projects and their related data have been removed." + self.assertEqual(expected, out_value) + def test_scanpipe_management_command_create_user(self): out = StringIO()