Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bundle analysis: create timeseries dataset upon upload #601

Merged
merged 5 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions upload/tests/views/test_bundle_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import re
from unittest.mock import ANY, patch

import pytest
from django.test import override_settings
from django.urls import reverse
from rest_framework.test import APIClient

Expand All @@ -10,8 +12,10 @@
from core.tests.factories import CommitFactory, RepositoryFactory
from services.redis_configuration import get_redis_connection
from services.task import TaskService
from timeseries.models import Dataset, MeasurementName


@pytest.mark.django_db(databases={"default", "timeseries"})
def test_upload_bundle_analysis(db, client, mocker, mock_redis):
upload = mocker.patch.object(TaskService, "upload")
mock_sentry_metrics = mocker.patch(
Expand Down Expand Up @@ -96,6 +100,7 @@ def test_upload_bundle_analysis(db, client, mocker, mock_redis):
)


@pytest.mark.django_db(databases={"default", "timeseries"})
def test_upload_bundle_analysis_org_token(db, client, mocker, mock_redis):
upload = mocker.patch.object(TaskService, "upload")
create_presigned_put = mocker.patch(
Expand All @@ -120,6 +125,7 @@ def test_upload_bundle_analysis_org_token(db, client, mocker, mock_redis):
assert res.status_code == 201


@pytest.mark.django_db(databases={"default", "timeseries"})
def test_upload_bundle_analysis_existing_commit(db, client, mocker, mock_redis):
upload = mocker.patch.object(TaskService, "upload")
create_presigned_put = mocker.patch(
Expand Down Expand Up @@ -216,6 +222,7 @@ def test_upload_bundle_analysis_invalid_token(db, client, mocker, mock_redis):
assert not upload.called


@pytest.mark.django_db(databases={"default", "timeseries"})
@patch("upload.helpers.jwt.decode")
@patch("upload.helpers.PyJWKClient")
def test_upload_bundle_analysis_github_oidc_auth(
Expand Down Expand Up @@ -246,3 +253,98 @@ def test_upload_bundle_analysis_github_oidc_auth(
format="json",
)
assert res.status_code == 201


@pytest.mark.django_db(databases={"default", "timeseries"})
def test_upload_bundle_analysis_measurement_datasets_created(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jw, is it possible to parametrize this and the other test with the pytest.mark.parametrize() decorator?

@pytest.mark.parametrize("using_integration", [True, False])

db, client, mocker, mock_redis
):
mocker.patch.object(TaskService, "upload")
mocker.patch("upload.views.bundle_analysis.sentry_metrics.incr")
mocker.patch(
"services.archive.StorageService.create_presigned_put",
return_value="test-presigned-put",
)

repository = RepositoryFactory.create()
commit_sha = "6fd5b89357fc8cdf34d6197549ac7c6d7e5977ef"

client = APIClient()
client.credentials(HTTP_AUTHORIZATION=f"token {repository.upload_token}")

res = client.post(
reverse("upload-bundle-analysis"),
{
"commit": commit_sha,
"slug": f"{repository.author.username}::::{repository.name}",
"build": "test-build",
"buildURL": "test-build-url",
"job": "test-job",
"service": "test-service",
},
format="json",
headers={"User-Agent": "codecov-cli/0.4.7"},
)
assert res.status_code == 201

supported_bundle_analysis_measurement_types = [
MeasurementName.BUNDLE_ANALYSIS_ASSET_SIZE,
MeasurementName.BUNDLE_ANALYSIS_FONT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_IMAGE_SIZE,
MeasurementName.BUNDLE_ANALYSIS_JAVASCRIPT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_REPORT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_STYLESHEET_SIZE,
]
for measurement_type in supported_bundle_analysis_measurement_types:
assert Dataset.objects.filter(
name=measurement_type.value,
repository_id=repository.pk,
).exists()


@override_settings(TIMESERIES_ENABLED=False)
@pytest.mark.django_db(databases={"default", "timeseries"})
def test_upload_bundle_analysis_measurement_timeseries_disabled(
db, client, mocker, mock_redis
):
mocker.patch.object(TaskService, "upload")
mocker.patch("upload.views.bundle_analysis.sentry_metrics.incr")
mocker.patch(
"services.archive.StorageService.create_presigned_put",
return_value="test-presigned-put",
)

repository = RepositoryFactory.create()
commit_sha = "6fd5b89357fc8cdf34d6197549ac7c6d7e5977ef"

client = APIClient()
client.credentials(HTTP_AUTHORIZATION=f"token {repository.upload_token}")

res = client.post(
reverse("upload-bundle-analysis"),
{
"commit": commit_sha,
"slug": f"{repository.author.username}::::{repository.name}",
"build": "test-build",
"buildURL": "test-build-url",
"job": "test-job",
"service": "test-service",
},
format="json",
headers={"User-Agent": "codecov-cli/0.4.7"},
)
assert res.status_code == 201

supported_bundle_analysis_measurement_types = [
MeasurementName.BUNDLE_ANALYSIS_ASSET_SIZE,
MeasurementName.BUNDLE_ANALYSIS_FONT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_IMAGE_SIZE,
MeasurementName.BUNDLE_ANALYSIS_JAVASCRIPT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_REPORT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_STYLESHEET_SIZE,
]
for measurement_type in supported_bundle_analysis_measurement_types:
assert not Dataset.objects.filter(
name=measurement_type.value,
repository_id=repository.pk,
).exists()
27 changes: 27 additions & 0 deletions upload/views/bundle_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import uuid

from django.conf import settings
from rest_framework import serializers, status
from rest_framework.exceptions import NotAuthenticated
from rest_framework.permissions import BasePermission
Expand All @@ -21,6 +22,7 @@
from reports.models import CommitReport
from services.archive import ArchiveService
from services.redis_configuration import get_redis_connection
from timeseries.models import Dataset, MeasurementName
from upload.helpers import dispatch_upload_task, generate_upload_sentry_metrics_tags
from upload.views.base import ShelterMixin
from upload.views.helpers import get_repository_from_string
Expand Down Expand Up @@ -142,4 +144,29 @@ def post(self, request):
is_shelter_request=self.is_shelter_request(),
),
)

if settings.TIMESERIES_ENABLED:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to jump right to creating or do we want to create if it doesn't already exist / isn't backfilled? the latter is what is done for coverage i think

dataset = Dataset.objects.filter(
name=MeasurementName.COVERAGE.value,
repository_id=repository.pk,
).first()
if settings.TIMESERIES_ENABLED and dataset and dataset.is_backfilled():
# timeseries data is ready
return coverage_measurements(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its better to create it as soon as possible then it is to create it when we fetch the measurements, that way we don't miss out on a datapoint in the general flow of upload->process->query since the process step is when we insert the measurement datapoints which relies on the presence of the dataset rows being inserted.

With the coverage measurements its ok because when querying and not having dataset created it can still compute and return measurements on the fly and then do a backfill. Also with BA we don't plan on doing backfills because the previous bundle schemas don't support it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh on second read the code is get_or_create(), i thought it was just create(). my concern was that you would create multiple Datasets with the same type/repoID which sounded wrong. but this is all good

supported_bundle_analysis_measurement_types = [
MeasurementName.BUNDLE_ANALYSIS_ASSET_SIZE,
MeasurementName.BUNDLE_ANALYSIS_FONT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_IMAGE_SIZE,
MeasurementName.BUNDLE_ANALYSIS_JAVASCRIPT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_REPORT_SIZE,
MeasurementName.BUNDLE_ANALYSIS_STYLESHEET_SIZE,
]
for measurement_type in supported_bundle_analysis_measurement_types:
_, created = Dataset.objects.get_or_create(
name=measurement_type.value,
repository_id=repo.pk,
)
if created:
log.info(
"Created new timescale dataset for bundle analysis",
extra=dict(
commit=commit.commitid,
repoid=repo.repoid,
measurement_type=measurement_type,
),
)

return Response({"url": url}, status=201)
Loading