Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

connector-insights: use SPDX SBOM generated on publish #44493

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions airbyte-ci/connectors/connector_ops/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ poetry run pytest
```

## Changelog
- 0.8.0: Add a `sbom_url` property to `Connector`
- 0.7.0: Added required reviewers for manifest-only connector changes/additions.
- 0.6.1: Simplified gradle dependency discovery logic.
- 0.6.0: Added manifest-only build.
Expand Down
17 changes: 17 additions & 0 deletions airbyte-ci/connectors/connector_ops/connector_ops/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,23 @@ def cloud_usage(self) -> Optional[str]:

return get(connector_entry, "generated.metrics.cloud.usage")

@property
def sbom_url(self) -> Optional[str]:
alafanechere marked this conversation as resolved.
Show resolved Hide resolved
"""
Fetches SBOM URL from the connector definition in the OSS registry, if it exists, None otherwise.
"""
metadata = self.metadata
definition_id = metadata.get("definitionId")
# We use the OSS registry as the source of truth for released connectors as the cloud registry can be a subset of the OSS registry.
oss_registry = download_catalog(OSS_CATALOG_URL)

all_connectors_of_type = oss_registry[f"{self.connector_type}s"]
connector_entry = find(all_connectors_of_type, {self.registry_primary_key_field: definition_id})
if not connector_entry:
return None

return get(connector_entry, "generated.sbomUrl")

@property
def image_address(self) -> str:
return f'{self.metadata["dockerRepository"]}:{self.metadata["dockerImageTag"]}'
Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/connector_ops/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "connector_ops"
version = "0.7.0"
version = "0.8.0"
description = "Packaged maintained by the connector operations team to perform CI for connectors"
authors = ["Airbyte <contact@airbyte.io>"]

Expand Down
3 changes: 3 additions & 0 deletions airbyte-ci/connectors/connectors_insights/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ This CLI is currently running nightly in GitHub Actions. The workflow can be fou

## Changelog

### 0.3.3
Use SBOM from the connector registry (SPDX format) instead of generating SBOM in the connector insights.

### 0.3.2
Bugfix: Ignore CI on master report if it's not accessible.

Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/connectors_insights/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "connectors-insights"
version = "0.3.2"
version = "0.3.3"
description = ""
authors = ["Airbyte <contact@airbyte.io>"]
readme = "README.md"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
import re
from typing import TYPE_CHECKING

import requests
from connectors_insights.hacks import get_ci_on_master_report
from connectors_insights.models import ConnectorInsights
from connectors_insights.pylint import get_pylint_output
from connectors_insights.result_backends import FileToPersist, ResultBackend
from connectors_insights.sbom import get_json_sbom
from typing_extensions import Mapping

if TYPE_CHECKING:
from typing import Dict, List, Tuple
Expand Down Expand Up @@ -78,12 +77,17 @@ def get_sbom_inferred_insights(raw_sbom: str | None, connector: Connector) -> Di
if not raw_sbom:
return sbom_inferred_insights
sbom = json.loads(raw_sbom)
python_artifacts = {artifact["name"]: artifact for artifact in sbom["artifacts"] if artifact["type"] == "python"}
for artifact in sbom["artifacts"]:
dependency = {"type": artifact["type"], "version": artifact["version"], "package_name": artifact["name"]}
python_artifacts = {package["name"]: package for package in sbom["packages"] if package["SPDXID"].startswith("SPDXRef-Package-python-")}
sbom_inferred_insights["cdk_version"] = python_artifacts.get("airbyte-cdk", {}).get("versionInfo")

for package in sbom["packages"]:
package_type = package["SPDXID"].split("-")[2]
try:
dependency = {"type": package_type, "version": package["versionInfo"], "package_name": package["name"]}
except KeyError:
continue
if isinstance(sbom_inferred_insights["dependencies"], list) and dependency not in sbom_inferred_insights["dependencies"]:
sbom_inferred_insights["dependencies"].append(dependency)
sbom_inferred_insights["cdk_version"] = python_artifacts.get("airbyte-cdk", {}).get("version")
return sbom_inferred_insights


Expand Down Expand Up @@ -141,7 +145,7 @@ def should_skip_generation(
return True


async def fetch_sbom(dagger_client: dagger.Client, connector: Connector) -> str | None:
def fetch_sbom(connector: Connector) -> str | None:
"""Fetch the SBOM for the connector if it is released.
SBOM are generated from published Docker images. If the connector is not released it does not have a published Docker image.

Expand All @@ -152,8 +156,10 @@ async def fetch_sbom(dagger_client: dagger.Client, connector: Connector) -> str
Returns:
str | None: The SBOM in JSON format if the connector is released, None otherwise.
"""
if connector.is_released:
return await get_json_sbom(dagger_client, connector)
if connector.sbom_url:
r = requests.get(connector.sbom_url)
r.raise_for_status()
return r.text
return None


Expand Down Expand Up @@ -234,8 +240,7 @@ async def generate_insights_for_connector(
"""
logger = logging.getLogger(__name__)
insights_file = FileToPersist("insights.json")
sbom_file = FileToPersist("sbom.json")
files_to_persist = [insights_file, sbom_file]
files_to_persist = [insights_file]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: if there's just one file, do we even need this list in a variable?


async with semaphore:
if should_skip_generation(result_backends, connector, files_to_persist, rewrite):
Expand All @@ -246,10 +251,7 @@ async def generate_insights_for_connector(
result_backends = result_backends or []
try:
pylint_output = await get_pylint_output(dagger_client, connector)
raw_sbom = await fetch_sbom(dagger_client, connector)
if raw_sbom:
sbom_file.set_file_content(raw_sbom)

raw_sbom = fetch_sbom(connector)
insights = generate_insights(connector, raw_sbom, pylint_output)
insights_file.set_file_content(insights.json())
persist_files(connector, files_to_persist, result_backends, rewrite, logger)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from pathlib import Path
from typing import TYPE_CHECKING

import requests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait a second, how did that work before? Are requests used there or no?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I don't think requests are used ?

Comment on lines 8 to +9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
import requests


if TYPE_CHECKING:
from typing import Callable, List, Set, Tuple

Expand Down
Loading