Skip to content

Commit

Permalink
Remove duplicated changelogs (aboutcode-org#1400)
Browse files Browse the repository at this point in the history
* Remove duplicated changelogs

Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>

* Fix migrations

Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>

* Fix migrations

Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>

---------

Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
  • Loading branch information
TG1999 committed Jul 19, 2024
1 parent 1291daa commit ee25c88
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

def remove_duped_changelogs(apps, schema_editor):
PackageChangeLog = apps.get_model("vulnerabilities", "PackageChangeLog")
VulnerabilityChangeLog = apps.get_model("vulnerabilities", "VulnerabilityChangeLog")

models_list = [PackageChangeLog, VulnerabilityChangeLog]

for model in models_list:
# Identify duplicate records based on actor_name, action_type, and source_url
duplicate_records = model.objects.values('actor_name', 'action_type', 'source_url').annotate(count=models.Count('id')).filter(count__gt=1)

to_be_deleted = list()

for duplicate_set in duplicate_records:
# Get the records for the current duplicate set
records_to_delete = model.objects.filter(
actor_name=duplicate_set['actor_name'],
action_type=duplicate_set['action_type'],
source_url=duplicate_set['source_url']
).order_by('-software_version')

# Keep the record with the older software version
record_to_keep = records_to_delete.last()

# Delete the records with the newer software version
to_be_deleted.extend(records_to_delete.exclude(id=record_to_keep.id))

to_be_deleted = list(set(to_be_deleted))
to_be_deleted = [rec.id for rec in to_be_deleted]
model.objects.filter(id__in = to_be_deleted).delete()

dependencies = [
("vulnerabilities", "0054_alter_packagechangelog_software_version_and_more"),
]

operations = [
migrations.RunPython(remove_duped_changelogs, reverse_code=migrations.RunPython.noop),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 4.1.13 on 2024-01-22 09:42

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0055_remove_changelogs_with_same_data_different_software_version"),
]

operations = [
migrations.AlterUniqueTogether(
name="packagechangelog",
unique_together={("action_time", "actor_name", "action_type", "source_url")},
),
migrations.AlterUniqueTogether(
name="vulnerabilitychangelog",
unique_together={("action_time", "actor_name", "action_type", "source_url")},
),
]
1 change: 1 addition & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,7 @@ def get_iso_time(self):
class Meta:
abstract = True
ordering = ("-action_time",)
unique_together = ("action_time", "actor_name", "action_type", "source_url")


class VulnerabilityHistoryManager(models.Manager):
Expand Down
50 changes: 50 additions & 0 deletions vulnerabilities/tests/test_data_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,3 +610,53 @@ def setUpBeforeMigration(self, apps):
def test_removal_of_duped_purls(self):
Package = apps.get_model("vulnerabilities", "Package")
assert Package.objects.count() == 1


class TestRemoveDupedChangeLogWithSameData(TestMigrations):
app_name = "vulnerabilities"
migrate_from = "0054_alter_packagechangelog_software_version_and_more"
migrate_to = "0055_remove_changelogs_with_same_data_different_software_version"

def setUpBeforeMigration(self, apps):
PackageChangeLog = apps.get_model("vulnerabilities", "PackageChangeLog")
VulnerabilityChangeLog = apps.get_model("vulnerabilities", "VulnerabilityChangeLog")
Package = apps.get_model("vulnerabilities", "Package")
Vulnerability = apps.get_model("vulnerabilities", "Vulnerability")
pkg1 = Package.objects.create(type="nginx", name="nginx", qualifiers={"os": "windows"})
vuln = Vulnerability.objects.create(summary="NEW")
PackageChangeLog.objects.create(
actor_name="Nginx",
action_type=1,
source_url="test",
software_version="1",
package=pkg1,
related_vulnerability=vuln,
)
PackageChangeLog.objects.create(
actor_name="Nginx",
action_type=1,
source_url="test",
software_version="2",
package=pkg1,
related_vulnerability=vuln,
)
VulnerabilityChangeLog.objects.create(
actor_name="Nginx",
action_type=1,
source_url="test",
software_version="2",
vulnerability=vuln,
)
VulnerabilityChangeLog.objects.create(
actor_name="Nginx",
action_type=1,
source_url="test",
software_version="1",
vulnerability=vuln,
)

def test_removal_of_changelog(self):
PackageChangeLog = apps.get_model("vulnerabilities", "PackageChangeLog")
VulnerabilityChangeLog = apps.get_model("vulnerabilities", "VulnerabilityChangeLog")
assert PackageChangeLog.objects.all().count() == 1
assert VulnerabilityChangeLog.objects.all().count() == 1

0 comments on commit ee25c88

Please sign in to comment.