Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QA-Engine: Persist daily reports to GCS #22662

Merged
merged 7 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/run-qa-engine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ name: Run QA Engine

on:
workflow_dispatch:
# schedule:
## 1pm UTC is 6am PDT.
## same time as Generate Build Report
# - cron: "0 13 * * *"
schedule:
# 1pm UTC is 6am PDT.
# same time as Generate Build Report
- cron: "0 13 * * *"
Comment on lines +5 to +8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 turn it on!


jobs:
run-qa-engine:
Expand All @@ -32,3 +32,4 @@ jobs:
QA_ENGINE_AIRBYTE_DATA_PROD_SA: "${{ secrets.QA_ENGINE_AIRBYTE_DATA_PROD_SA }}"
GITHUB_API_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
run: run-qa-engine
# run: run-qa-engine --create-prs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: could use a TODO or a NOTE on when to uncomment and who owns it

Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"b76be0a6-27dc-4560-95f6-2623da0bd7b6", # Local SQL Lite
]

GCS_QA_REPORT_PATH = "gs://prod-airbyte-cloud-connector-metadata-service/qa_report.json"
GCS_QA_REPORT_PATH = "gs://airbyte-data-connectors-qa-engine/"
evantahler marked this conversation as resolved.
Show resolved Hide resolved
AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER = "airbytehq"
AIRBYTE_PLATFORM_INTERNAL_REPO_NAME = "airbyte-platform-internal"
AIRBYTE_PLATFORM_INTERNAL_GITHUB_REPO_URL = (
Expand Down
21 changes: 14 additions & 7 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@

import logging

from . import cloud_availability_updater, enrichments, inputs, validations
from .constants import CLOUD_CATALOG_URL, OSS_CATALOG_URL
import click

from . import cloud_availability_updater, enrichments, inputs, outputs, validations
from .constants import CLOUD_CATALOG_URL, GCS_QA_REPORT_PATH, OSS_CATALOG_URL

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)


def main():
@click.command()
@click.option("--create-prs", is_flag=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very nice

def main(create_prs):
logger.info("Fetch the OSS connectors catalog.")
oss_catalog = inputs.fetch_remote_catalog(OSS_CATALOG_URL)
logger.info("Fetch the Cloud connectors catalog.")
Expand All @@ -23,7 +27,10 @@ def main():
enriched_catalog = enrichments.get_enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version)
logger.info("Start the QA report generation.")
qa_report = validations.get_qa_report(enriched_catalog, len(oss_catalog))
logger.info("Start the QA report generation.")
eligible_connectors = validations.get_connectors_eligible_for_cloud(qa_report)
logger.info("Start eligible connectors deployment to Cloud.")
cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo(eligible_connectors)
logger.info("Persist QA report to GCS")
outputs.persist_qa_report(qa_report, GCS_QA_REPORT_PATH, public_fields_only=False)

if create_prs:
logger.info("Start eligible connectors deployment to Cloud.")
eligible_connectors = validations.get_connectors_eligible_for_cloud(qa_report)
cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo(eligible_connectors)
13 changes: 9 additions & 4 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@
#


from datetime import datetime

import pandas as pd

from .models import ConnectorQAReport

def persist_qa_report(qa_report: pd.DataFrame, path: str, public_fields_only: bool =True):

def persist_qa_report(qa_report: pd.DataFrame, path: str, public_fields_only: bool = True) -> str:
report_generation_date = datetime.strftime(qa_report["report_generation_datetime"].max(), "%Y%m%d")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is report_generation_datetime different for each row in the qa_report?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No its the same value on each row for a report.

path = path + f"{report_generation_date}_qa_report.jsonl"
final_fields = [
field.name for field in ConnectorQAReport.__fields__.values()
if field.field_info.extra["is_public"] or not public_fields_only
field.name for field in ConnectorQAReport.__fields__.values() if field.field_info.extra["is_public"] or not public_fields_only
]
qa_report[final_fields].to_json(path, orient="records")
qa_report[final_fields].to_json(path, orient="records", lines=True)
return path
1 change: 1 addition & 0 deletions tools/ci_connector_ops/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"click~=8.1.3",
"requests",
"PyYAML~=6.0",
"GitPython~=3.1.29",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


Expand Down Expand Up @@ -167,11 +167,11 @@ def test_create_pr(mocker, pr_already_created):
response = cloud_availability_updater.create_pr(connector, "my_awesome_branch")
expected_url = "https://api.github.com/repos/airbytehq/airbyte-platform-internal/pulls"
expected_body = f"""The Cloud Availability Updater decided that it's the right time to make {connector.connector_name} available on Cloud!
Technical name: {connector.connector_technical_name}
Version: {connector.connector_version}
Definition ID: {connector.connector_definition_id}
OSS sync success rate: {connector.sync_success_rate}
OSS number of connections: {connector.number_of_connections}
- Technical name: {connector.connector_technical_name}
- Version: {connector.connector_version}
- Definition ID: {connector.connector_definition_id}
- OSS sync success rate: {connector.sync_success_rate}
- OSS number of connections: {connector.number_of_connections}
"""
expected_data = {
"title": "🤖 Add source-foobar to cloud",
Expand Down
25 changes: 19 additions & 6 deletions tools/ci_connector_ops/tests/test_qa_engine/test_main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


import pytest
from ci_connector_ops.qa_engine import main
from click.testing import CliRunner


def test_main(mocker, dummy_qa_report):
@pytest.mark.parametrize("create_prs", [False, True])
def test_main(mocker, dummy_qa_report, create_prs):
runner = CliRunner()

mock_oss_catalog = mocker.Mock(__len__=mocker.Mock(return_value=42))
mock_cloud_catalog = mocker.Mock()

Expand All @@ -16,16 +21,24 @@ def test_main(mocker, dummy_qa_report):
mocker.patch.object(main.validations, "get_qa_report", mocker.Mock(return_value=dummy_qa_report))
mocker.patch.object(main.validations, "get_connectors_eligible_for_cloud")
mocker.patch.object(main.cloud_availability_updater, "deploy_eligible_connectors_to_cloud_repo")
mocker.patch.object(main.outputs, "persist_qa_report")

main.main()
if create_prs:
runner.invoke(main.main, ["--create-prs"])
else:
runner.invoke(main.main)

assert main.inputs.fetch_remote_catalog.call_count == 2
main.inputs.fetch_remote_catalog.assert_has_calls([mocker.call(main.OSS_CATALOG_URL), mocker.call(main.CLOUD_CATALOG_URL)])
main.enrichments.get_enriched_catalog.assert_called_with(
mock_oss_catalog, mock_cloud_catalog, main.inputs.fetch_adoption_metrics_per_connector_version.return_value
)
main.validations.get_qa_report.assert_called_with(main.enrichments.get_enriched_catalog.return_value, len(mock_oss_catalog))
main.validations.get_connectors_eligible_for_cloud.assert_called_with(main.validations.get_qa_report.return_value)
main.cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo.assert_called_with(
main.validations.get_connectors_eligible_for_cloud.return_value
main.outputs.persist_qa_report.assert_called_with(
main.validations.get_qa_report.return_value, main.GCS_QA_REPORT_PATH, public_fields_only=False
)
if create_prs:
main.validations.get_connectors_eligible_for_cloud.assert_called_with(main.validations.get_qa_report.return_value)
main.cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo.assert_called_with(
main.validations.get_connectors_eligible_for_cloud.return_value
)
14 changes: 5 additions & 9 deletions tools/ci_connector_ops/tests/test_qa_engine/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,14 @@

import pandas as pd
import pytest

from ci_connector_ops.qa_engine import outputs


@pytest.mark.parametrize("public_fields_only", [True, False])
def test_persist_qa_report_public_fields_only(tmp_path, dummy_qa_report, public_fields_only):
output_path = tmp_path / "qa_report.json"
outputs.persist_qa_report(dummy_qa_report, output_path, public_fields_only=public_fields_only)
qa_report_from_disk = pd.read_json(output_path)
private_fields = {
field.name for field in outputs.ConnectorQAReport.__fields__.values()
if not field.field_info.extra["is_public"]
}
def test_persist_qa_report(tmp_path, dummy_qa_report, public_fields_only):
output_path = outputs.persist_qa_report(dummy_qa_report, str(tmp_path), public_fields_only=public_fields_only)
qa_report_from_disk = pd.read_json(output_path, lines=True)
private_fields = {field.name for field in outputs.ConnectorQAReport.__fields__.values() if not field.field_info.extra["is_public"]}
available_fields = set(qa_report_from_disk.columns)
if public_fields_only:
assert not private_fields.issubset(available_fields)
Expand Down