Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Watch for packages (model and implementation) #244 #271

Merged
merged 20 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@ services:
volumes:
- db_data:/var/lib/postgresql/data/

redis:
image: redis
command: redis-server --appendonly yes
volumes:
- redis_data:/data
restart: always

web:
build: .
command: sh -c "
python manage.py migrate &&
python manage.py collectstatic --no-input --verbosity 0 --clear &&
gunicorn purldb.wsgi:application --bind :8000 --timeout 600 --workers 8"
gunicorn purldb_project.wsgi:application --bind :8000 --timeout 600 --workers 8"
env_file:
- docker.env
expose:
Expand Down Expand Up @@ -112,6 +119,30 @@ services:
depends_on:
- db
- web

scheduler:
build: .
command: wait-for-it web:8000 -- python manage.py run_scheduler
env_file:
- docker.env
volumes:
- /etc/purldb/:/etc/purldb/
depends_on:
- redis
- db
- web

rq_worker:
build: .
command: wait-for-it web:8000 -- python manage.py rqworker default
env_file:
- docker.env
volumes:
- /etc/purldb/:/etc/purldb/
depends_on:
- redis
- db
- web

nginx:
image: nginx
Expand All @@ -127,3 +158,4 @@ services:
volumes:
db_data:
static:
redis_data:
2 changes: 2 additions & 0 deletions docker.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ POSTGRES_PASSWORD=packagedb
POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=en_US.UTF-8 --lc-ctype=en_US.UTF-8

PACKAGEDB_DB_HOST=db

PURLDB_REDIS_HOST=redis
91 changes: 69 additions & 22 deletions packagedb/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,72 @@

import django_filters
from django.core.exceptions import ValidationError
from django.db.models import OuterRef, Q, Subquery
from django_filters.filters import Filter, OrderingFilter
from django.db.models import OuterRef
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
from django.db.models import Q
from django.db.models import Subquery
from django_filters.filters import Filter
from django_filters.filters import OrderingFilter
from django_filters.rest_framework import FilterSet
from matchcode.api import MultipleCharFilter
from matchcode.api import MultipleCharInFilter

from minecode import priority_router
# UnusedImport here!
# But importing the mappers and visitors module triggers routes registration
from minecode import visitors # NOQA
from minecode.models import PriorityResourceURI
from minecode.models import ScannableURI
from minecode.route import NoRouteAvailable
from packagedb.filters import PackageSearchFilter
from packagedb.models import Package
from packagedb.models import PackageContentType
from packagedb.models import PackageSet
from packagedb.models import PackageWatch
from packagedb.models import Resource
from packagedb.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE
from packagedb.package_managers import get_api_package_name
from packagedb.package_managers import get_version_fetcher
from packagedb.serializers import DependentPackageSerializer
from packagedb.serializers import PackageAPISerializer
from packagedb.serializers import PackageSetAPISerializer
from packagedb.serializers import PackageWatchAPISerializer
from packagedb.serializers import PackageWatchCreateSerializer
from packagedb.serializers import PackageWatchUpdateSerializer
from packagedb.serializers import PartySerializer
from packagedb.serializers import PurlValidateResponseSerializer
from packagedb.serializers import PurlValidateSerializer
from packagedb.serializers import ResourceAPISerializer
from packagedb.throttling import StaffUserRateThrottle
from packageurl import PackageURL
from packageurl.contrib.django.utils import purl_to_lookups
from rest_framework import status, viewsets
from rest_framework import mixins
from rest_framework import status
from rest_framework import viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.throttling import AnonRateThrottle
from univers.version_constraint import InvalidConstraintsError
from univers.version_range import RANGE_CLASS_BY_SCHEMES, VersionRange
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import VersionRange
from univers.versions import InvalidVersion

from matchcode.api import MultipleCharFilter, MultipleCharInFilter
# UnusedImport here!
# But importing the mappers and visitors module triggers routes registration
from minecode import visitors # NOQA
from minecode import priority_router
from minecode.models import PriorityResourceURI, ScannableURI
from minecode.route import NoRouteAvailable
from packagedb.filters import PackageSearchFilter
from packagedb.models import Package, PackageContentType, PackageSet, Resource
from packagedb.package_managers import (VERSION_API_CLASSES_BY_PACKAGE_TYPE,
get_api_package_name,
get_version_fetcher)
from packagedb.serializers import (DependentPackageSerializer,
PackageAPISerializer,
PackageSetAPISerializer, PartySerializer, PurlValidateResponseSerializer, PurlValidateSerializer,
ResourceAPISerializer)
from packagedb.throttling import StaffUserRateThrottle
logger = logging.getLogger(__name__)


logger = logging.getLogger(__name__)
class CreateListRetrieveUpdateViewSet(
mixins.CreateModelMixin,
mixins.ListModelMixin,
mixins.RetrieveModelMixin,
mixins.UpdateModelMixin,
viewsets.GenericViewSet,
):
"""
A viewset that provides `create`, `list, `retrieve`, and `update` actions.
To use it, override the class and set the `.queryset` and
`.serializer_class` attributes.
"""
pass


class PackageResourcePurlFilter(Filter):
def filter(self, qs, value):
Expand Down Expand Up @@ -517,6 +550,20 @@ class PackageSetViewSet(viewsets.ReadOnlyModelViewSet):
serializer_class = PackageSetAPISerializer


class PackageWatchViewSet(CreateListRetrieveUpdateViewSet):
queryset = PackageWatch.objects.get_queryset().order_by('-id')
serializer_class = PackageWatchAPISerializer
lookup_field = 'package_url'
lookup_value_regex = r'pkg:[a-zA-Z0-9_]+\/[a-zA-Z0-9_.-]+(?:\/[a-zA-Z0-9_.-]+)*'

def get_serializer_class(self):
if self.action == 'create':
return PackageWatchCreateSerializer
elif self.action == 'update':
return PackageWatchUpdateSerializer
return super().get_serializer_class()


class CollectViewSet(viewsets.ViewSet):
"""
Return Package data for the purl passed in the `purl` query parameter.
Expand Down
31 changes: 31 additions & 0 deletions packagedb/management/commands/run_scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# PurlDB is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from django_rq.management.commands import rqscheduler
from packagedb.models import PackageWatch
from packagedb.schedules import clear_zombie_watch_schedules
from packagedb.schedules import scheduled_job_exists


def init_watch_scheduled():
"""Initialize scheduled jobs for active PackageWatch."""
active_watch_qs = PackageWatch.objects.filter(is_active=True)
for watch in active_watch_qs:
if scheduled_job_exists(watch.schedule_work_id):
continue
new_id = watch.create_new_job()
watch.schedule_work_id = new_id
watch.save(update_fields=["schedule_work_id"])


class Command(rqscheduler.Command):
def handle(self, *args, **kwargs):
clear_zombie_watch_schedules()
init_watch_scheduled()
super(Command, self).handle(*args, **kwargs)
133 changes: 133 additions & 0 deletions packagedb/migrations/0082_packagewatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Generated by Django 4.2.6 on 2024-01-18 13:14

import django.core.validators
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("packagedb", "0081_apiuser"),
]

operations = [
migrations.CreateModel(
name="PackageWatch",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"package_url",
models.CharField(
db_index=True,
help_text="Package-URL to watch. If the PURL has a version, qualifiers or subpath, they are stripped and ignored.",
max_length=2048,
unique=True,
),
),
(
"type",
models.CharField(
blank=True,
db_index=True,
help_text="A short code to identify the type of this package.",
max_length=16,
null=True,
),
),
(
"namespace",
models.CharField(
blank=True,
db_index=True,
help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.",
max_length=255,
null=True,
),
),
(
"name",
models.CharField(
blank=True,
db_index=True,
help_text="Name of the package.",
max_length=100,
null=True,
),
),
(
"is_active",
models.BooleanField(
db_index=True,
default=True,
help_text="When set to True (Yes), this Package Watch is active. When set to False (No), this watch is inactive and not processed.",
null=True,
),
),
(
"depth",
models.PositiveSmallIntegerField(
choices=[(1, "Version"), (2, "Metadata"), (3, "Scan")],
default=3,
help_text="Depth of data collection from listing versions up to a full scan.",
),
),
(
"watch_interval",
models.PositiveSmallIntegerField(
default=7,
help_text="Number of days to wait between watches of this package.",
validators=[
django.core.validators.MinValueValidator(
1, message="Interval must be at least 1 day."
),
django.core.validators.MaxValueValidator(
365, message="Interval must be at most 365 days."
),
],
),
),
(
"creation_date",
models.DateTimeField(
auto_now_add=True,
help_text="Timestamp indicating when this watch object was created.",
),
),
(
"last_watch_date",
models.DateTimeField(
blank=True,
db_index=True,
help_text="Timestamp indicating when this PURL was last watched.",
null=True,
),
),
(
"watch_error",
models.TextField(
blank=True,
help_text="Watch error messages of the last watch, if any. When present this means the watch failed. This is reset on each new watch.",
null=True,
),
),
(
"schedule_work_id",
models.CharField(
blank=True,
db_index=True,
help_text="Identifier used to manage the periodic watch job.",
max_length=255,
null=True,
unique=True,
),
),
],
),
]
Loading
Loading