Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Source Google Analytics (Universal Analytics): add Custom Reports schema validation on Check Connection #18670

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@
- name: Google Analytics (Universal Analytics)
sourceDefinitionId: eff3616a-f9c3-11eb-9a03-0242ac130003
dockerRepository: airbyte/source-google-analytics-v4
dockerImageTag: 0.1.30
dockerImageTag: 0.1.31
documentationUrl: https://docs.airbyte.com/integrations/sources/google-analytics-universal-analytics
icon: google-analytics.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4188,7 +4188,7 @@
oauthFlowOutputParameters:
- - "access_token"
- - "refresh_token"
- dockerImage: "airbyte/source-google-analytics-v4:0.1.30"
- dockerImage: "airbyte/source-google-analytics-v4:0.1.31"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/google-analytics-universal-analytics"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ COPY main.py ./
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.30
LABEL io.airbyte.name=airbyte/source-google-analytics-v4
LABEL io.airbyte.version=0.1.31
LABEL io.airbyte.name=airbyte/source-google-analytics-v4
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#


from dataclasses import dataclass
from typing import Dict, List, Optional, Union

from pydantic import BaseModel, Field, ValidationError


class Model(BaseModel):
class Config:
extra = "forbid"

name: str
dimensions: list[str]
metrics: list[str]
filter: Optional[str]


class Explainer:
"""
ERRORS_MAPPING holds an external `Pydantic.ValidationError` types and their placeholders.
{
key: str = <Pydantic.ValidationError Type>,
value: tuple(str, list) = (<explainable message>, <list as placeholder>
}

"""

errors_mapping = {
"value_error.missing": ("fields required", []),
"value_error.extra": ("fields not permitted", []),
"type_error": ("type errors", []),
}

def parse(self, errors: List[Dict]) -> str:
for error in errors:
field_name, error_type = error.get("loc")[0], error.get("type")

# general errors
if error_type in self.errors_mapping:
self.errors_mapping.get(error_type)[1].append(field_name)

# type errors
if "type_error" in error_type:
error_type, _type = error_type.split(".")
self.errors_mapping.get(error_type)[1].append((field_name, f"{_type} is required"))

def explain(self, errors: List[Dict]):
"""
General Errors are explained first.
Such as:
- missing required field
- presence of non-permitted fields

Type Errors are explained last.
If model attribute has invalid type provided, like list, but str was required and etc:
- str is required,
- ...
"""

self.parse(errors)

for error_type in self.errors_mapping:
msg, errors = self.errors_mapping.get(error_type)
if errors:
return f"{msg} {errors}"


@dataclass
class CustomReportsValidator:

custom_reports: Union[List[Dict], Dict] = Field(default_factory=list)

def __post_init__(self):
self.reports: list = [self.custom_reports] if not isinstance(self.custom_reports, list) else self.custom_reports
self.model: Model = Model
self.explainer: Explainer = Explainer()

def validate(self):

# local import of airbyte_cdk dependencies
from airbyte_cdk.models import FailureType
from airbyte_cdk.utils.traced_exception import AirbyteTracedException

try:
for report in self.reports:
self.model.parse_obj(report)
except ValidationError as e:
raise AirbyteTracedException(
message=None,
internal_message=f"Custom Reports has invalid structure in report: {report}, errors: {self.explainer.explain(e.errors())}",
failure_type=FailureType.config_error,
) from None
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from airbyte_cdk.sources.streams.http import HttpStream
from airbyte_cdk.sources.streams.http.auth import Oauth2Authenticator

from .custom_reports_validator import CustomReportsValidator

DATA_IS_NOT_GOLDEN_MSG = "Google Analytics data is not golden. Future requests may return different data."

RESULT_IS_SAMPLED_MSG = (
Expand Down Expand Up @@ -572,12 +574,12 @@ def check_connection(self, logger: logging.Logger, config: MutableMapping) -> Tu
config["metrics"] = ["ga:hits"]
config["dimensions"] = ["ga:date"]

# load and verify the custom_reports
try:
# test the eligibility of custom_reports input
custom_reports = config.get("custom_reports")
if custom_reports:
json.loads(custom_reports)

CustomReportsValidator(json.loads(custom_reports)).validate()
# Read records to check the reading permissions
read_check = list(TestStreamConnection(config).read_records(sync_mode=None))
if read_check:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#


import copy
import json
from pathlib import Path

import pendulum
import pytest
from airbyte_cdk.models import ConfiguredAirbyteCatalog
from airbyte_cdk.sources.streams.http.auth import NoAuth


def read_file(file_name):
parent_location = Path(__file__).absolute().parent
file = open(parent_location / file_name).read()
return file


@pytest.fixture
def mock_metrics_dimensions_type_list_link(requests_mock):
requests_mock.get(
"https://www.googleapis.com/analytics/v3/metadata/ga/columns",
json=json.loads(read_file("metrics_dimensions_type_list.json")),
)


@pytest.fixture
def mock_auth_call(requests_mock):
yield requests_mock.post(
"https://oauth2.googleapis.com/token",
json={"access_token": "", "expires_in": 0},
)


@pytest.fixture
def mock_auth_check_connection(requests_mock):
yield requests_mock.post(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
json={"data": {"test": "value"}},
)


@pytest.fixture
def mock_unknown_metrics_or_dimensions_error(requests_mock):
yield requests_mock.post(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
status_code=400,
json={"error": {"message": "Unknown metrics or dimensions"}},
)


@pytest.fixture
def mock_daily_request_limit_error(requests_mock):
yield requests_mock.post(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
status_code=429,
json={"error": {"code": 429, "message": "Quota Error: profileId 207066566 has exceeded the daily request limit."}},
)


@pytest.fixture
def mock_api_returns_no_records(requests_mock):
"""API returns empty data for given date based slice"""
yield requests_mock.post(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
json=json.loads(read_file("empty_response.json")),
)


@pytest.fixture
def mock_api_returns_valid_records(requests_mock):
"""API returns valid data for given date based slice"""
response = json.loads(read_file("response_golden_data.json"))
for report in response["reports"]:
assert report["data"]["isDataGolden"] is True
yield requests_mock.post("https://analyticsreporting.googleapis.com/v4/reports:batchGet", json=response)


@pytest.fixture
def mock_api_returns_sampled_results(requests_mock):
"""API returns valid data for given date based slice"""
yield requests_mock.post(
"https://analyticsreporting.googleapis.com/v4/reports:batchGet",
json=json.loads(read_file("response_with_sampling.json")),
)


@pytest.fixture
def mock_api_returns_is_data_golden_false(requests_mock):
"""API returns valid data for given date based slice"""
response = json.loads(read_file("response_non_golden_data.json"))
for report in response["reports"]:
assert "isDataGolden" not in report["data"]
yield requests_mock.post("https://analyticsreporting.googleapis.com/v4/reports:batchGet", json=response)


@pytest.fixture
def configured_catalog():
return ConfiguredAirbyteCatalog.parse_obj(json.loads(read_file("./configured_catalog.json")))


@pytest.fixture()
def test_config():
test_conf = {
"view_id": "1234567",
"window_in_days": 1,
"authenticator": NoAuth(),
"metrics": [],
"start_date": pendulum.now().subtract(days=2).date().strftime("%Y-%m-%d"),
"dimensions": [],
"credentials": {
"auth_type": "Client",
"client_id": "client_id_val",
"client_secret": "client_secret_val",
"refresh_token": "refresh_token_val",
},
}
return copy.deepcopy(test_conf)


@pytest.fixture()
def test_config_auth_service(test_config):
test_config["credentials"] = {
"auth_type": "Service",
"credentials_json": '{"client_email": "", "private_key": "", "private_key_id": ""}',
}
return copy.deepcopy(test_config)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import pytest
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
from source_google_analytics_v4.custom_reports_validator import CustomReportsValidator


@pytest.mark.parametrize(
"custom_reports, expected",
(
([{"name": [], "dimensions": ["test"], "metrics": ["test"]}], "errors: type errors"),
([{"name": "test", "dimensions": ["test"], "metrics": ["test"], "added_field": "test"}], "errors: fields not permitted"),
([{"missing_name": "test", "dimensions": ["test"], "metrics": ["test"]}], "errors: fields required"),
),
ids=["type_error", "not_permitted", "missing"],
)
def test_custom_reports_validator(custom_reports, expected):
try:
CustomReportsValidator(custom_reports).validate()
except AirbyteTracedException as e:
assert expected in str(e)
Loading