Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

362 update status task #375

Merged
merged 6 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docker-compose_purldb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ services:
volumes:
- /etc/purldb/:/etc/purldb/
- static:/var/purldb/static/
- temp_data:/tmp/minecode/
depends_on:
- db

Expand Down Expand Up @@ -113,6 +114,7 @@ services:
- docker_purldb.env
volumes:
- /etc/purldb/:/etc/purldb/
- temp_data:/tmp/minecode/
depends_on:
- redis
- db
Expand Down Expand Up @@ -141,3 +143,4 @@ volumes:
db_data:
static:
redis_data:
temp_data:
4 changes: 2 additions & 2 deletions docker_purldb.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ POSTGRES_PASSWORD=packagedb
POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=en_US.UTF-8 --lc-ctype=en_US.UTF-8

PACKAGEDB_DB_HOST=db

PURLDB_REDIS_HOST=redis
PURLDB_REDIS_HOST=redis
PURLDB_ASYNC=True
51 changes: 24 additions & 27 deletions minecode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from minecode.models import PriorityResourceURI, ResourceURI, ScannableURI
from minecode.permissions import IsScanQueueWorkerAPIUser
from minecode.utils import validate_uuid
from minecode.utils import get_temp_file


class ResourceURISerializer(serializers.ModelSerializer):
Expand Down Expand Up @@ -202,33 +203,29 @@ def update_status(self, request, *args, **kwargs):
scan_results_file = request.data.get('scan_results_file')
scan_summary_file = request.data.get('scan_summary_file')
project_extra_data = request.data.get('project_extra_data')
scan_data = json.load(scan_results_file)
summary_data = json.load(scan_summary_file)
project_extra_data = json.loads(project_extra_data)

scannable_uri.scan_status = ScannableURI.SCAN_COMPLETED

indexing_errors = index_package(
scannable_uri,
scannable_uri.package,
scan_data,
summary_data,
project_extra_data,
reindex=scannable_uri.reindex_uri,

# Save results to temporary files
scan_results_location = get_temp_file(
file_name='scan_results',
extension='.json'
)
if indexing_errors:
scannable_uri.scan_status = ScannableURI.SCAN_INDEX_FAILED
scannable_uri.index_error = indexing_errors
msg = {
'error': f'scan index failed for scannable_uri {scannable_uri_uuid}'
}
return Response(msg, status=status.HTTP_400_BAD_REQUEST)
else:
scannable_uri.scan_status = ScannableURI.SCAN_INDEXED
msg = {
'status': f'scan indexed for scannable_uri {scannable_uri_uuid}'
}
scannable_uri.wip_date = None
scannable_uri.save()
scan_summary_location = get_temp_file(
file_name='scan_summary',
extension='.json'
)
with open(scan_results_location, 'wb') as f:
f.write(scan_results_file.read())
with open(scan_summary_location, 'wb') as f:
f.write(scan_summary_file.read())

scannable_uri.process_scan_results(
scan_results_location=scan_results_location,
scan_summary_location=scan_summary_location,
project_extra_data=project_extra_data
)
msg = {
'status': f'scan results for scannable_uri {scannable_uri_uuid} '
'have been queued for indexing'
}

return Response(msg)
32 changes: 31 additions & 1 deletion minecode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
import logging
import sys

from django.conf import settings
from django.db import models
from django.utils import timezone

import django_rq

from minecode import map_router
from minecode import visit_router
Expand Down Expand Up @@ -760,6 +761,35 @@ def save(self, *args, **kwargs):
self.normalize_fields()
super(ScannableURI, self).save(*args, **kwargs)

def process_scan_results(
self,
scan_results_location,
scan_summary_location,
project_extra_data
):
from minecode import tasks

self.scan_status = self.SCAN_COMPLETED
self.save()

if not settings.PURLDB_ASYNC:
tasks.process_scan_results(
scannable_uri_uuid=self.uuid,
scan_results_location=scan_results_location,
scan_summary_location=scan_summary_location,
project_extra_data=project_extra_data,
)
return

job = django_rq.enqueue(
tasks.process_scan_results,
scannable_uri_uuid=self.uuid,
scan_results_location=scan_results_location,
scan_summary_location=scan_summary_location,
project_extra_data=project_extra_data,
)
return job


# TODO: Use the QuerySet.as_manager() for more flexibility and chaining.
class PriorityResourceURIManager(models.Manager):
Expand Down
64 changes: 64 additions & 0 deletions minecode/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json

from commoncode.fileutils import delete

from minecode.management.indexing import index_package
from minecode.models import ScannableURI


def process_scan_results(
scannable_uri_uuid,
scan_results_location,
scan_summary_location,
project_extra_data,
):
"""
Indexes the scan results from `scan_results_location`,
`scan_summary_location`, and `project_extra_data` for the Package related to
ScannableURI with UUID `scannable_uri_uuid`.

`scan_results_location` and `scan_summary_location` are deleted after the
indexing process has finished.
"""

with open(scan_results_location) as f:
scan_data = json.load(f)
with open(scan_summary_location) as f:
summary_data = json.load(f)
project_extra_data = json.loads(project_extra_data)

try:
scannable_uri = ScannableURI.objects.get(uuid=scannable_uri_uuid)
except ScannableURI.DoesNotExist:
raise Exception(f'ScannableURI {scannable_uri_uuid} does not exist!')

indexing_errors = index_package(
scannable_uri,
scannable_uri.package,
scan_data,
summary_data,
project_extra_data,
reindex=scannable_uri.reindex_uri,
)

if indexing_errors:
scannable_uri.scan_status = ScannableURI.SCAN_INDEX_FAILED
scannable_uri.index_error = indexing_errors
else:
scannable_uri.scan_status = ScannableURI.SCAN_INDEXED

scannable_uri.wip_date = None
scannable_uri.save()

# Clean up after indexing has ended
delete(scan_results_location)
delete(scan_summary_location)
90 changes: 90 additions & 0 deletions minecode/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os

from django.test import TestCase
from unittest import mock

from minecode.models import ScannableURI
from packagedb.models import Package
from minecode.utils_test import JsonBasedTesting
from minecode import tasks


class MinecodeTasksTestCase(JsonBasedTesting, TestCase):
test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles')

def setUp(self):
self.package1 = Package.objects.create(
download_url='https://test-url.com/package1.tar.gz',
type='type1',
name='name1',
version='1.0',
)
self.scannable_uri1 = ScannableURI.objects.create(
uri='https://test-url.com/package1.tar.gz',
package=self.package1
)
self.project_extra_data1 = {
'md5': 'md5',
'sha1': 'sha1',
'sha256': 'sha256',
'sha512': 'sha512',
'size': 100,
}

@mock.patch('os.remove')
def test_minecode_tasks_process_scan_results(self, mock_delete):
mock_delete.side_effect = [None, None]

self.assertFalse(self.package1.md5)
self.assertFalse(self.package1.sha1)
self.assertFalse(self.package1.sha256)
self.assertFalse(self.package1.sha512)
self.assertFalse(self.package1.size)
self.assertFalse(self.package1.declared_license_expression)
self.assertFalse(self.package1.copyright)
self.assertEquals(0, self.package1.resources.count())
scan_file_location = self.get_test_loc('scancodeio/get_scan_data.json')
summary_file_location = self.get_test_loc('scancodeio/scan_summary_response.json')
project_extra_data = json.dumps(self.project_extra_data1)
tasks.process_scan_results(
self.scannable_uri1.uuid,
scan_results_location=scan_file_location,
scan_summary_location=summary_file_location,
project_extra_data=project_extra_data,
)
self.package1.refresh_from_db()
self.assertEqual('md5', self.package1.md5)
self.assertEqual('sha1', self.package1.sha1)
self.assertEqual('sha256', self.package1.sha256)
self.assertEqual('sha512', self.package1.sha512)
self.assertEqual(100, self.package1.size)
self.assertEqual('apache-2.0', self.package1.declared_license_expression)
self.assertEqual('Copyright (c) Apache Software Foundation', self.package1.copyright)
self.assertFalse(self.scannable_uri1.scan_error)
self.assertEqual(64, self.package1.resources.count())

def test_minecode_tasks_process_scan_results_scannableuri_does_not_exist(self):
nonexisting_uuid = '420db78a-625f-4622-b1a0-93d1ea853194'
scan_file_location = self.get_test_loc('scancodeio/get_scan_data.json')
summary_file_location = self.get_test_loc('scancodeio/scan_summary_response.json')
project_extra_data = json.dumps(self.project_extra_data1)

with self.assertRaises(Exception) as context:
tasks.process_scan_results(
nonexisting_uuid,
scan_results_location=scan_file_location,
scan_summary_location=summary_file_location,
project_extra_data=project_extra_data,
)
expected_message = f'ScannableURI {nonexisting_uuid} does not exist!'
self.assertIn(expected_message, str(context.exception))
6 changes: 6 additions & 0 deletions purldb_project/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@
'VERSION': PURLDB_VERSION,
'SERVE_INCLUDE_SCHEMA': False,
}

RQ_QUEUES = {
'default': {
"HOST": env.str("PURLDB_REDIS_HOST", default="localhost"),
Expand All @@ -316,3 +317,8 @@
"DEFAULT_TIMEOUT": env.int("PURLDB_REDIS_DEFAULT_TIMEOUT", default=360),
}
}

PURLDB_ASYNC = env.bool("PURLDB_ASYNC", default=False)
if not PURLDB_ASYNC:
for queue_config in RQ_QUEUES.values():
queue_config["ASYNC"] = False
Loading