Skip to content

Commit

Permalink
QA-Engine: Persist daily reports to GCS (#22662)
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere authored Feb 9, 2023
1 parent 44a85a9 commit 9e8035c
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 144 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/run-qa-engine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ name: Run QA Engine

on:
workflow_dispatch:
# schedule:
## 1pm UTC is 6am PDT.
## same time as Generate Build Report
# - cron: "0 13 * * *"
schedule:
# 1pm UTC is 6am PDT.
# same time as Generate Build Report
- cron: "0 13 * * *"

jobs:
run-qa-engine:
Expand All @@ -18,7 +18,7 @@ jobs:
- name: Setup Cloud SDK
uses: google-github-actions/setup-gcloud@v0
with:
service_account_key: ${{ secrets.PROD_SPEC_CACHE_SA_KEY }}
service_account_key: ${{ secrets.QA_ENGINE_AIRBYTE_DATA_PROD_SA }}
export_default_credentials: true
- name: Install Python
uses: actions/setup-python@v4
Expand All @@ -29,6 +29,7 @@ jobs:
- name: Run QA Engine
env:
LOGLEVEL: INFO
QA_ENGINE_AIRBYTE_DATA_PROD_SA: "${{ secrets.QA_ENGINE_AIRBYTE_DATA_PROD_SA }}"
GITHUB_API_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
run: run-qa-engine
# TODO: enable PR creation when all the QA checks are implemented:
# run: run-qa-engine --create-prs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"b76be0a6-27dc-4560-95f6-2623da0bd7b6", # Local SQL Lite
]

GCS_QA_REPORT_PATH = "gs://prod-airbyte-cloud-connector-metadata-service/qa_report.json"
GCS_QA_REPORT_PATH = "gs://airbyte-data-connectors-qa-engine/"
AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER = "airbytehq"
AIRBYTE_PLATFORM_INTERNAL_REPO_NAME = "airbyte-platform-internal"
AIRBYTE_PLATFORM_INTERNAL_GITHUB_REPO_URL = (
Expand Down
46 changes: 23 additions & 23 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,19 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
from importlib.resources import files
import json
import logging
from enum import Enum
from importlib.resources import files
from typing import Optional

from .constants import CONNECTOR_BUILD_OUTPUT_URL

from google.oauth2 import service_account
import requests
import pandas as pd
from typing import Optional
import requests

from enum import Enum
from .constants import CONNECTOR_BUILD_OUTPUT_URL

LOGGER = logging.getLogger(__name__)


class BUILD_STATUSES(str, Enum):
SUCCESS = "success"
FAILURE = "failure"
Expand All @@ -31,15 +28,15 @@ def from_string(cls, string_value: Optional[str]) -> "BUILD_STATUSES":
return BUILD_STATUSES[string_value.upper()]



def get_connector_build_output_url(connector_technical_name: str, connector_version: str) -> str:
"""
Get the connector build output url.
Documentation of the larger build output system can be found here: https://internal-docs.airbyte.io/Generated-Reports/Build-Status-Reports
"""
return f"{CONNECTOR_BUILD_OUTPUT_URL}/{connector_technical_name}/version-{connector_version}.json"

def fetch_latest_build_status_for_connector_version(connector_technical_name: str, connector_version: str) ->BUILD_STATUSES:

def fetch_latest_build_status_for_connector_version(connector_technical_name: str, connector_version: str) -> BUILD_STATUSES:
"""Fetch the latest build status for a given connector version."""
connector_build_output_url = get_connector_build_output_url(connector_technical_name, connector_version)
connector_build_output_response = requests.get(connector_build_output_url)
Expand All @@ -58,6 +55,7 @@ def fetch_latest_build_status_for_connector_version(connector_technical_name: st
else:
return BUILD_STATUSES.NOT_FOUND


def fetch_remote_catalog(catalog_url: str) -> pd.DataFrame:
"""Fetch a combined remote catalog and return a single DataFrame
with sources and destinations defined by the connector_type column.
Expand All @@ -77,22 +75,24 @@ def fetch_remote_catalog(catalog_url: str) -> pd.DataFrame:
destinations["connector_definition_id"] = destinations.destinationDefinitionId
return pd.concat([sources, destinations])


def fetch_adoption_metrics_per_connector_version() -> pd.DataFrame:
"""Retrieve adoptions metrics for each connector version from our data warehouse.
Returns:
pd.DataFrame: A dataframe with adoption metrics per connector version.
"""
connector_adoption_sql = files("ci_connector_ops.qa_engine").joinpath("connector_adoption.sql").read_text()
bq_credentials = service_account.Credentials.from_service_account_info(json.loads(os.environ["QA_ENGINE_AIRBYTE_DATA_PROD_SA"]))
adoption_metrics = pd.read_gbq(connector_adoption_sql, project_id="airbyte-data-prod", credentials=bq_credentials)
return adoption_metrics[[
"connector_definition_id",
"connector_version",
"number_of_connections",
"number_of_users",
"succeeded_syncs_count",
"failed_syncs_count",
"total_syncs_count",
"sync_success_rate",
]]
adoption_metrics = pd.read_gbq(connector_adoption_sql, project_id="airbyte-data-prod")
return adoption_metrics[
[
"connector_definition_id",
"connector_version",
"number_of_connections",
"number_of_users",
"succeeded_syncs_count",
"failed_syncs_count",
"total_syncs_count",
"sync_success_rate",
]
]
21 changes: 14 additions & 7 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@

import logging

from . import cloud_availability_updater, enrichments, inputs, validations
from .constants import CLOUD_CATALOG_URL, OSS_CATALOG_URL
import click

from . import cloud_availability_updater, enrichments, inputs, outputs, validations
from .constants import CLOUD_CATALOG_URL, GCS_QA_REPORT_PATH, OSS_CATALOG_URL

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)


def main():
@click.command()
@click.option("--create-prs", is_flag=True)
def main(create_prs):
logger.info("Fetch the OSS connectors catalog.")
oss_catalog = inputs.fetch_remote_catalog(OSS_CATALOG_URL)
logger.info("Fetch the Cloud connectors catalog.")
Expand All @@ -23,7 +27,10 @@ def main():
enriched_catalog = enrichments.get_enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version)
logger.info("Start the QA report generation.")
qa_report = validations.get_qa_report(enriched_catalog, len(oss_catalog))
logger.info("Start the QA report generation.")
eligible_connectors = validations.get_connectors_eligible_for_cloud(qa_report)
logger.info("Start eligible connectors deployment to Cloud.")
cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo(eligible_connectors)
logger.info("Persist QA report to GCS")
outputs.persist_qa_report(qa_report, GCS_QA_REPORT_PATH, public_fields_only=False)

if create_prs:
logger.info("Start eligible connectors deployment to Cloud.")
eligible_connectors = validations.get_connectors_eligible_for_cloud(qa_report)
cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo(eligible_connectors)
13 changes: 9 additions & 4 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@
#


from datetime import datetime

import pandas as pd

from .models import ConnectorQAReport

def persist_qa_report(qa_report: pd.DataFrame, path: str, public_fields_only: bool =True):

def persist_qa_report(qa_report: pd.DataFrame, path: str, public_fields_only: bool = True) -> str:
report_generation_date = datetime.strftime(qa_report["report_generation_datetime"].max(), "%Y%m%d")
path = path + f"{report_generation_date}_qa_report.jsonl"
final_fields = [
field.name for field in ConnectorQAReport.__fields__.values()
if field.field_info.extra["is_public"] or not public_fields_only
field.name for field in ConnectorQAReport.__fields__.values() if field.field_info.extra["is_public"] or not public_fields_only
]
qa_report[final_fields].to_json(path, orient="records")
qa_report[final_fields].to_json(path, orient="records", lines=True)
return path
1 change: 1 addition & 0 deletions tools/ci_connector_ops/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from setuptools import find_packages, setup

MAIN_REQUIREMENTS = [
"click~=8.1.3",
"requests",
"PyYAML~=6.0",
"GitPython~=3.1.29",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


Expand Down Expand Up @@ -167,11 +167,11 @@ def test_create_pr(mocker, pr_already_created):
response = cloud_availability_updater.create_pr(connector, "my_awesome_branch")
expected_url = "https://api.github.com/repos/airbytehq/airbyte-platform-internal/pulls"
expected_body = f"""The Cloud Availability Updater decided that it's the right time to make {connector.connector_name} available on Cloud!
Technical name: {connector.connector_technical_name}
Version: {connector.connector_version}
Definition ID: {connector.connector_definition_id}
OSS sync success rate: {connector.sync_success_rate}
OSS number of connections: {connector.number_of_connections}
- Technical name: {connector.connector_technical_name}
- Version: {connector.connector_version}
- Definition ID: {connector.connector_definition_id}
- OSS sync success rate: {connector.sync_success_rate}
- OSS number of connections: {connector.number_of_connections}
"""
expected_data = {
"title": "🤖 Add source-foobar to cloud",
Expand Down
Loading

0 comments on commit 9e8035c

Please sign in to comment.