From b37efe9cda2aa3f347a4ff581737a10948308036 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:31:30 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Google=20Analyti?= =?UTF-8?q?cs=20v4=20Service=20Account=20Only=20(#34323)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../.dockerignore | 7 + .../README.md | 149 +++++++++ .../acceptance-test-config.yml | 35 +++ .../icon.svg | 1 + .../integration_tests/__init__.py | 23 ++ .../integration_tests/abnormal_state.json | 86 ++++++ .../integration_tests/acceptance.py | 13 + .../integration_tests/catalog.json | 123 ++++++++ .../integration_tests/configured_catalog.json | 125 ++++++++ .../configured_catalog_segment_filters.json | 15 + .../integration_tests/expected_records.jsonl | 22 ++ .../integration_tests/invalid_config.json | 10 + .../integration_tests/sample_config.json | 6 + .../integration_tests/sample_state.json | 86 ++++++ .../main.py | 13 + .../metadata.yaml | 32 ++ .../requirements.txt | 1 + .../setup.py | 35 +++ .../__init__.py | 28 ++ .../source.py | 12 + .../spec.json | 79 +++++ .../source-google-analytics-v4/metadata.yaml | 2 +- .../source-google-analytics-v4/setup.py | 2 +- ...oogle-analytics-v4-service-account-only.md | 286 ++++++++++++++++++ .../sources/google-analytics-v4.md | 1 + 25 files changed, 1190 insertions(+), 2 deletions(-) create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/.dockerignore create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/README.md create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/icon.svg create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog_segment_filters.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/expected_records.jsonl create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/main.py create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/metadata.yaml create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/requirements.txt create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/setup.py create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/__init__.py create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/source.py create mode 100644 airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/spec.json create mode 100644 docs/integrations/sources/google-analytics-v4-service-account-only.md diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/.dockerignore b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/.dockerignore new file mode 100644 index 000000000000..e3ebf60f6c58 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_google_analytics_v4_service_account_only +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/README.md b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/README.md new file mode 100644 index 000000000000..2c931f8d643f --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/README.md @@ -0,0 +1,149 @@ +# Google Analytics V4 (Service Account Only) Source + +This is the repository for the Google Analytics V4 source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/google-analytics-v4). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/google-analytics-v4) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_google_analytics_v4/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source google-analytics-v4-service-account-only test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + + +#### Use `airbyte-ci` to build your connector +The Airbyte way of building this connector is to use our `airbyte-ci` tool. +You can follow install instructions [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md#L1). +Then running the following command will build your connector: + +```bash +airbyte-ci connectors --name=source-google-analytics-v4-service-account-only build +``` +Once the command is done, you will find your connector image in your local docker registry: `airbyte/source-google-analytics-v4-service-account-only:dev`. + +##### Customizing our build process +When contributing on our connector you might need to customize the build process to add a system dependency or set an env var. +You can customize our build process by adding a `build_customization.py` module to your connector. +This module should contain a `pre_connector_install` and `post_connector_install` async function that will mutate the base image and the connector container respectively. +It will be imported at runtime by our build process and the functions will be called if they exist. + +Here is an example of a `build_customization.py` module: +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # Feel free to check the dagger documentation for more information on the Container object and its methods. + # https://dagger-io.readthedocs.io/en/sdk-python-v0.6.4/ + from dagger import Container + + +async def pre_connector_install(base_image_container: Container) -> Container: + return await base_image_container.with_env_variable("MY_PRE_BUILD_ENV_VAR", "my_pre_build_env_var_value") + +async def post_connector_install(connector_container: Container) -> Container: + return await connector_container.with_env_variable("MY_POST_BUILD_ENV_VAR", "my_post_build_env_var_value") +``` + +#### Build your own connector image +This connector is built using our dynamic built process in `airbyte-ci`. +The base image used to build it is defined within the metadata.yaml file under the `connectorBuildOptions`. +The build logic is defined using [Dagger](https://dagger.io/) [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/pipelines/builds/python_connectors.py). +It does not rely on a Dockerfile. + +If you would like to patch our connector and build your own a simple approach would be to: + +1. Create your own Dockerfile based on the latest version of the connector image. +```Dockerfile +FROM airbyte/source-google-analytics-v4-service-account-only:latest + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +# ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +# ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +``` +Please use this as an example. This is not optimized. + +2. Build your image: +```bash +docker build -t airbyte/source-google-analytics-v4-service-account-only:dev . +# Running the spec command against your patched connector +docker run airbyte/source-google-analytics-v4-service-account-only:dev spec +``` +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-google-analytics-v4-service-account-only:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-google-analytics-v4-service-account-only:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-google-analytics-v4-service-account-only:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-google-analytics-v4-service-account-only:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=source-google-analytics-v4 test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=source-google-analytics-v4-service-account-only test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/sources/google-analytics-v4-service-account-only.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/acceptance-test-config.yml new file mode 100644 index 000000000000..6a4357b097a9 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/acceptance-test-config.yml @@ -0,0 +1,35 @@ +connector_image: airbyte/source-google-analytics-v4-service-account-only:dev +test_strictness_level: high +acceptance_tests: + spec: + tests: + - spec_path: source_google_analytics_v4_service_account_only/spec.json + discovery: + tests: + - config_path: secrets/config.json + connection: + tests: + - config_path: secrets/config.json + status: succeed + - config_path: integration_tests/invalid_config.json + status: exception + basic_read: + tests: + - config_path: secrets/config.json + empty_streams: + - name: users_per_city + bypass_reason: no records in the stream + expect_records: + path: integration_tests/expected_records.jsonl + timeout_seconds: 1800 + full_refresh: + tests: + - config_path: secrets/config.json + configured_catalog_path: integration_tests/configured_catalog.json + incremental: + tests: + - config_path: secrets/config.json + configured_catalog_path: integration_tests/configured_catalog.json + timeout_seconds: 2400 + future_state: + future_state_path: integration_tests/abnormal_state.json diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/icon.svg b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/icon.svg new file mode 100644 index 000000000000..94dfa7142701 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/__init__.py b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/__init__.py new file mode 100644 index 000000000000..9db886e0930f --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/__init__.py @@ -0,0 +1,23 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..6886688610f5 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/abnormal_state.json @@ -0,0 +1,86 @@ +[ + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "website_overview" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "traffic_sources" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "pages" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "locations" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "monthly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "four_weekly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "two_weekly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "weekly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "daily_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "devices" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "users_per_day" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2050-05-01" }, + "stream_descriptor": { "name": "new_users_per_day" } + } + } +] diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/acceptance.py new file mode 100644 index 000000000000..d49b55882333 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/acceptance.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + yield diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/catalog.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/catalog.json new file mode 100644 index 000000000000..f5cbf5205f5f --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/catalog.json @@ -0,0 +1,123 @@ +{ + "streams": [ + { + "stream": { + "name": "website_overview", + "json_schema": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": { + "ga_date": { + "type": ["string"] + }, + "ga_users": { + "type": ["null", "integer"] + }, + "ga_newUsers": { + "type": ["null", "integer"] + }, + "ga_sessions": { + "type": ["null", "integer"] + }, + "ga_sessionsPerUser": { + "type": ["null", "number"] + }, + "ga_avgSessionDuration": { + "type": ["null", "number"] + }, + "ga_pageviews": { + "type": ["null", "integer"] + }, + "ga_pageviewsPerSession": { + "type": ["null", "number"] + }, + "ga_avgTimeOnPage": { + "type": ["null", "number"] + }, + "ga_bounceRate": { + "type": ["null", "number"] + }, + "ga_exitRate": { + "type": ["null", "number"] + }, + "report_start_date": { + "type": ["string"] + }, + "report_end_date": { + "type": ["string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["report_start_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "traffic_sources", + "json_schema": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": { + "ga_date": { + "type": ["string"] + }, + "ga_source": { + "type": ["string"] + }, + "ga_medium": { + "type": ["string"] + }, + "ga_socialNetwork": { + "type": ["string"] + }, + "ga_users": { + "type": ["null", "integer"] + }, + "ga_newUsers": { + "type": ["null", "integer"] + }, + "ga_sessions": { + "type": ["null", "integer"] + }, + "ga_sessionsPerUser": { + "type": ["null", "number"] + }, + "ga_avgSessionDuration": { + "type": ["null", "number"] + }, + "ga_pageviews": { + "type": ["null", "integer"] + }, + "ga_pageviewsPerSession": { + "type": ["null", "number"] + }, + "ga_avgTimeOnPage": { + "type": ["null", "number"] + }, + "ga_bounceRate": { + "type": ["null", "number"] + }, + "ga_exitRate": { + "type": ["null", "number"] + }, + "report_start_date": { + "type": ["string"] + }, + "report_end_date": { + "type": ["string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["report_start_date"], + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..71b19aca3eaa --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog.json @@ -0,0 +1,125 @@ +{ + "streams": [ + { + "stream": { + "name": "website_overview", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "traffic_sources", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "pages", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "locations", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "monthly_active_users", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "four_weekly_active_users", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "two_weekly_active_users", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "weekly_active_users", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "daily_active_users", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "devices", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "new_users_per_day", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog_segment_filters.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog_segment_filters.json new file mode 100644 index 000000000000..2c34edf6530d --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/configured_catalog_segment_filters.json @@ -0,0 +1,15 @@ +{ + "streams": [ + { + "stream": { + "name": "new_users_per_day", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true + }, + "sync_mode": "incremental", + "cursor_field": ["ga_date"], + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/expected_records.jsonl new file mode 100644 index 000000000000..a3ad03e595d9 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/expected_records.jsonl @@ -0,0 +1,22 @@ +{"stream": "website_overview", "data": {"ga_date": "2023-05-23", "ga_users": 1, "ga_newUsers": 1, "ga_sessions": 1, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 1, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023935006} +{"stream": "website_overview", "data": {"ga_date": "2023-05-24", "ga_users": 4, "ga_newUsers": 3, "ga_sessions": 4, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 4, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023935007} +{"stream": "traffic_sources", "data": {"ga_date": "2023-05-24", "ga_source": "(direct)", "ga_medium": "(none)", "ga_socialNetwork": "(not set)", "ga_users": 3, "ga_newUsers": 3, "ga_sessions": 3, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 3, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023943712} +{"stream": "traffic_sources", "data": {"ga_date": "2023-05-24", "ga_source": "api.surveymonkey.com", "ga_medium": "referral", "ga_socialNetwork": "(not set)", "ga_users": 1, "ga_newUsers": 0, "ga_sessions": 1, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 1, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023943713} +{"stream": "pages", "data": {"ga_date": "2023-05-24", "ga_hostname": "de.surveymonkey.com", "ga_pagePath": "/apps/NKI5TOTqk4tS5BZyJXU9YQ_3D_3D/details/", "ga_pageviews": 1, "ga_uniquePageviews": 1, "ga_avgTimeOnPage": 0.0, "ga_entrances": 1, "ga_entranceRate": 100.0, "ga_bounceRate": 100.0, "ga_exits": 1, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023951462} +{"stream": "pages", "data": {"ga_date": "2023-05-24", "ga_hostname": "www.surveymonkey.com", "ga_pagePath": "/apps/NKI5TOTqk4tS5BZyJXU9YQ_3D_3D/details/", "ga_pageviews": 3, "ga_uniquePageviews": 3, "ga_avgTimeOnPage": 0.0, "ga_entrances": 3, "ga_entranceRate": 100.0, "ga_bounceRate": 100.0, "ga_exits": 3, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023951463} +{"stream": "locations", "data": {"ga_date": "2023-05-24", "ga_continent": "Americas", "ga_subContinent": "Northern America", "ga_country": "United States", "ga_region": "New York", "ga_metro": "New York, NY", "ga_city": "New York", "ga_users": 1, "ga_newUsers": 1, "ga_sessions": 1, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 1, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023959587} +{"stream": "locations", "data": {"ga_date": "2023-05-24", "ga_continent": "Europe", "ga_subContinent": "Western Europe", "ga_country": "Germany", "ga_region": "Hessen", "ga_metro": "(not set)", "ga_city": "Frankfurt", "ga_users": 1, "ga_newUsers": 1, "ga_sessions": 1, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 1, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023959588} +{"stream": "monthly_active_users", "data": {"ga_date": "2023-05-24", "ga_30dayUsers": 32, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023967774} +{"stream": "monthly_active_users", "data": {"ga_date": "2023-05-25", "ga_30dayUsers": 32, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023968394} +{"stream": "four_weekly_active_users", "data": {"ga_date": "2023-05-24", "ga_28dayUsers": 30, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023975150} +{"stream": "four_weekly_active_users", "data": {"ga_date": "2023-05-25", "ga_28dayUsers": 28, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023976478} +{"stream": "two_weekly_active_users", "data": {"ga_date": "2023-05-24", "ga_14dayUsers": 17, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023983198} +{"stream": "two_weekly_active_users", "data": {"ga_date": "2023-05-25", "ga_14dayUsers": 16, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023983753} +{"stream": "weekly_active_users", "data": {"ga_date": "2023-05-24", "ga_7dayUsers": 10, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023990571} +{"stream": "weekly_active_users", "data": {"ga_date": "2023-05-25", "ga_7dayUsers": 10, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023991040} +{"stream": "daily_active_users", "data": {"ga_date": "2023-05-23", "ga_1dayUsers": 1, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023998149} +{"stream": "daily_active_users", "data": {"ga_date": "2023-05-24", "ga_1dayUsers": 4, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685023998151} +{"stream": "devices", "data": {"ga_date": "2023-05-24", "ga_deviceCategory": "desktop", "ga_operatingSystem": "Macintosh", "ga_browser": "Safari", "ga_users": 2, "ga_newUsers": 2, "ga_sessions": 2, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 2, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685024005565} +{"stream": "devices", "data": {"ga_date": "2023-05-24", "ga_deviceCategory": "desktop", "ga_operatingSystem": "Windows", "ga_browser": "Chrome", "ga_users": 1, "ga_newUsers": 0, "ga_sessions": 1, "ga_sessionsPerUser": 1.0, "ga_avgSessionDuration": 0.0, "ga_pageviews": 1, "ga_pageviewsPerSession": 1.0, "ga_avgTimeOnPage": 0.0, "ga_bounceRate": 100.0, "ga_exitRate": 100.0, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685024005566} +{"stream": "new_users_per_day", "data": {"ga_date": "2023-05-24", "ga_country": "Nigeria", "ga_region": "Lagos", "ga_newUsers": 1, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685024012689} +{"stream": "new_users_per_day", "data": {"ga_date": "2023-05-24", "ga_country": "United States", "ga_region": "New York", "ga_newUsers": 1, "view_id": "211669975", "isDataGolden": true}, "emitted_at": 1685024012690} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/invalid_config.json new file mode 100644 index 000000000000..0ab7ad4763b3 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/invalid_config.json @@ -0,0 +1,10 @@ +{ + "credentials": { + "auth_type": "Service", + "credentials_json": "None" + }, + "view_id": "211669975", + "start_date": "2021-02-11", + "window_in_days": 1, + "custom_reports": "[{\"name\": \"users_per_day\", \"dimensions\": [\"ga:date\"], \"metrics\": [\"ga:users\", \"ga:newUsers\"]}, {\"name\": \"sessions_per_country_day\", \"dimensions\": [\"ga:date\", \"ga:country\"], \"metrics\": [\"ga:sessions\", \"ga:sessionsPerUser\", \"ga:avgSessionDuration\"]}]" +} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_config.json new file mode 100644 index 000000000000..831183f5c793 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_config.json @@ -0,0 +1,6 @@ +{ + "view_id": "1234567", + "start_date": "2021-01-01", + "window_in_days": 1, + "custom_reports": "custom_reports" +} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_state.json new file mode 100644 index 000000000000..0c8625660e07 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/integration_tests/sample_state.json @@ -0,0 +1,86 @@ +[ + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "website_overview" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "traffic_sources" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "pages" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "locations" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "monthly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "four_weekly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "two_weekly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "weekly_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "daily_active_users" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "devices" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "users_per_day" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { "ga_date": "2021-02-11" }, + "stream_descriptor": { "name": "new_users_per_day" } + } + } +] diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/main.py b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/main.py new file mode 100644 index 000000000000..b91a0b49b694 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_google_analytics_v4_service_account_only import SourceGoogleAnalyticsV4ServiceAccountOnly + +if __name__ == "__main__": + source = SourceGoogleAnalyticsV4ServiceAccountOnly() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/metadata.yaml b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/metadata.yaml new file mode 100644 index 000000000000..79cbba174479 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/metadata.yaml @@ -0,0 +1,32 @@ +data: + ab_internal: + ql: 400 + sl: 100 + allowedHosts: + hosts: + - oauth2.googleapis.com + - www.googleapis.com + - analyticsdata.googleapis.com + - analyticsreporting.googleapis.com + connectorBuildOptions: + baseImage: docker.io/airbyte/python-connector-base:1.2.0@sha256:c22a9d97464b69d6ef01898edf3f8612dc11614f05a84984451dde195f337db9 + connectorSubtype: api + connectorType: source + definitionId: 9e28a926-8f3c-4911-982d-a2e1c378b59c + dockerImageTag: 0.0.1 + dockerRepository: airbyte/source-google-analytics-v4-service-account-only + documentationUrl: https://docs.airbyte.com/integrations/sources/google-analytics-v4-service-account-only + githubIssueLabel: source-google-analytics-v4-service-account-only + icon: google-analytics.svg + license: Elv2 + name: Google Analytics (Universal Analytics) + registries: + cloud: + enabled: true + oss: + enabled: true + releaseStage: generally_available + supportLevel: community + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/requirements.txt b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/requirements.txt new file mode 100644 index 000000000000..d6e1198b1ab1 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/setup.py b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/setup.py new file mode 100644 index 000000000000..c405add054d6 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from pathlib import Path + +from setuptools import find_packages, setup + + +def local_dependency(name: str) -> str: + """Returns a path to a local package.""" + return f"{name} @ file://{Path.cwd().parent / name}" + + +MAIN_REQUIREMENTS = ["airbyte-cdk", "PyJWT", "cryptography", "requests", local_dependency("source-google-analytics-v4")] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "requests-mock", + "pytest-mock", + "freezegun", +] + +setup( + name="source_google_analytics_v4_service_account_only", + description="Source implementation for Google Analytics V4.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/__init__.py b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/__init__.py new file mode 100644 index 000000000000..d3028f55635d --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/__init__.py @@ -0,0 +1,28 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + + +from .source import SourceGoogleAnalyticsV4ServiceAccountOnly + +__all__ = ["SourceGoogleAnalyticsV4ServiceAccountOnly"] diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/source.py b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/source.py new file mode 100644 index 000000000000..af0201aac566 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/source.py @@ -0,0 +1,12 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import source_google_analytics_v4 + + +class SourceGoogleAnalyticsV4ServiceAccountOnly(source_google_analytics_v4.SourceGoogleAnalyticsV4): + """Updating of default source logic + This connector shouldn't work with OAuth authentication method. + The base logic of this connector is implemented in the "source-source_google_analytics_v4" connector. + """ diff --git a/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/spec.json b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/spec.json new file mode 100644 index 000000000000..a4be0d1c2ea8 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-analytics-v4-service-account-only/source_google_analytics_v4_service_account_only/spec.json @@ -0,0 +1,79 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/sources/google-analytics-v4-service-account-only", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Google Analytics (V4) Spec", + "type": "object", + "required": ["view_id", "start_date"], + "additionalProperties": true, + "properties": { + "credentials": { + "order": 0, + "type": "object", + "title": "Credentials", + "description": "Credentials for the service", + "oneOf": [ + { + "type": "object", + "title": "Service Account Key Authentication", + "required": ["credentials_json"], + "properties": { + "auth_type": { + "type": "string", + "const": "Service", + "order": 0 + }, + "credentials_json": { + "title": "Service Account JSON Key", + "type": "string", + "description": "The JSON key of the service account to use for authorization", + "examples": [ + "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + ], + "airbyte_secret": true + } + } + } + ] + }, + "start_date": { + "order": 1, + "type": "string", + "title": "Replication Start Date", + "description": "The date in the format YYYY-MM-DD. Any data before this date will not be replicated.", + "examples": ["2020-06-01"], + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$|^$|[\\s\\S]+$", + "format": "date" + }, + "view_id": { + "order": 2, + "type": "string", + "title": "View ID", + "description": "The ID for the Google Analytics View you want to fetch data from. This can be found from the Google Analytics Account Explorer." + }, + "end_date": { + "order": 3, + "type": "string", + "title": "Replication End Date", + "description": "The date in the format YYYY-MM-DD. Any data after this date will not be replicated.", + "examples": ["2020-06-01"], + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$|^$|[\\s\\S]+$", + "format": "date" + }, + "custom_reports": { + "order": 4, + "type": "string", + "title": "Custom Reports", + "description": "A JSON array describing the custom reports you want to sync from Google Analytics. See the docs for more information about the exact format you can use to fill out this field." + }, + "window_in_days": { + "type": "integer", + "title": "Data request time increment in days", + "description": "The time increment used by the connector when requesting data from the Google Analytics API. More information is available in the the docs. The bigger this value is, the faster the sync will be, but the more likely that sampling will be applied to your data, potentially causing inaccuracies in the returned results. We recommend setting this to 1 unless you have a hard requirement to make the sync faster at the expense of accuracy. The minimum allowed value for this field is 1, and the maximum is 364. ", + "examples": [30, 60, 90, 120, 200, 364], + "default": 1, + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/metadata.yaml b/airbyte-integrations/connectors/source-google-analytics-v4/metadata.yaml index 921875383f55..0d3f2c4740fd 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/metadata.yaml +++ b/airbyte-integrations/connectors/source-google-analytics-v4/metadata.yaml @@ -13,7 +13,7 @@ data: connectorSubtype: api connectorType: source definitionId: eff3616a-f9c3-11eb-9a03-0242ac130003 - dockerImageTag: 0.2.3 + dockerImageTag: 0.2.4 dockerRepository: airbyte/source-google-analytics-v4 documentationUrl: https://docs.airbyte.com/integrations/sources/google-analytics-v4 githubIssueLabel: source-google-analytics-v4 diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/setup.py b/airbyte-integrations/connectors/source-google-analytics-v4/setup.py index c37ee40da749..62ec012a72f9 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/setup.py +++ b/airbyte-integrations/connectors/source-google-analytics-v4/setup.py @@ -21,7 +21,7 @@ author_email="contact@airbyte.io", packages=find_packages(), install_requires=MAIN_REQUIREMENTS, - package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + package_data={"": ["*.json", "defaults/*.json", "schemas/*.json", "schemas/shared/*.json"]}, extras_require={ "tests": TEST_REQUIREMENTS, }, diff --git a/docs/integrations/sources/google-analytics-v4-service-account-only.md b/docs/integrations/sources/google-analytics-v4-service-account-only.md new file mode 100644 index 000000000000..cb670a545a2b --- /dev/null +++ b/docs/integrations/sources/google-analytics-v4-service-account-only.md @@ -0,0 +1,286 @@ +# Google Analytics (Universal Analytics) + + + +This page contains the setup guide and reference information for the Google Analytics (Universal Analytics) source connector. + +This connector supports Universal Analytics properties through the [Reporting API v4](https://developers.google.com/analytics/devguides/reporting/core/v4). + + + +:::caution + +**The Google Analytics (Universal Analytics) connector will be deprecated soon.** + +Google is phasing out Universal Analytics in favor of Google Analytics 4 (GA4). In consequence, we are deprecating the Google Analytics (Universal Analytics) connector and recommend that you migrate to the [Google Analytics 4 (GA4) connector](https://docs.airbyte.com/integrations/sources/google-analytics-data-api) as soon as possible to ensure your syncs are not affected. + +Due to this deprecation, we will not be accepting new contributions for this source. + +For more information, see ["Universal Analytics is going away"](https://support.google.com/analytics/answer/11583528). + +::: + +:::note + +Google Analytics Universal Analytics (UA) connector, uses the older version of Google Analytics, which has been the standard for tracking website and app user behavior since 2012. + +[Google Analytics 4 (GA4) connector](https://docs.airbyte.com/integrations/sources/google-analytics-data-api) is the latest version of Google Analytics, which was introduced in 2020. It offers a new data model that emphasizes events and user properties, rather than pageviews and sessions. This new model allows for more flexible and customizable reporting, as well as more accurate measurement of user behavior across devices and platforms. + +::: + +## Prerequisites + +A Google Cloud account with [Viewer permissions](https://support.google.com/analytics/answer/2884495) and [Google Analytics Reporting API](https://console.developers.google.com/apis/api/analyticsreporting.googleapis.com/overview) and [Google Analytics API](https://console.developers.google.com/apis/api/analytics.googleapis.com/overview) enabled. + +## Setup guide + + + +**For Airbyte Cloud:** + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ New source**. +3. On the Set up the source page, select **Google Analytics** from the **Source type** dropdown. +4. For Name, enter a name for the Google Analytics connector. +5. Authenticate your Google account via Service Account Key Authentication. + - To authenticate your Google account via Service Account Key Authentication, enter your [Google Cloud service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#creating_service_account_keys) in JSON format. Make sure the Service Account has the Project Viewer permission. +6. Enter the **Replication Start Date** in YYYY-MM-DD format. The data added on and after this date will be replicated. If this field is blank, Airbyte will replicate all data. +7. Enter the [**View ID**](https://ga-dev-tools.appspot.com/account-explorer/) for the Google Analytics View you want to fetch data from. +8. Leave **Data request time increment in days (Optional)** blank or set to 1. For faster syncs, set this value to more than 1 but that might result in the Google Analytics API returning [sampled data](#sampled-data-in-reports), potentially causing inaccuracies in the returned results. The maximum allowed value is 364. + + + + +**For Airbyte Open Source:** + +1. Navigate to the Airbyte Open Source dashboard. +2. Go to the Airbyte UI and click **Sources** and then click **+ New source**. +3. On the Set up the source page, select **Google Analytics** from the **Source type** dropdown. +4. Enter a name for the Google Analytics connector. +5. Authenticate your Google account via Service Account Key Authentication: + - To authenticate your Google account via Service Account Key Authentication, enter your [Google Cloud service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#creating_service_account_keys) in JSON format. Use the service account email address to [add a user](https://support.google.com/analytics/answer/1009702) to the Google analytics view you want to access via the API and grant [Read and Analyze permissions](https://support.google.com/analytics/answer/2884495). +5. Enter the **Replication Start Date** in YYYY-MM-DD format. The data added on and after this date will be replicated. If this field is blank, Airbyte will replicate all data. + +6. Enter the [**View ID**](https://ga-dev-tools.appspot.com/account-explorer/) for the Google Analytics View you want to fetch data from. +7. Optionally, enter a JSON object as a string in the **Custom Reports** field. For details, refer to [Requesting custom reports](#requesting-custom-reports) +8. Leave **Data request time increment in days (Optional)** blank or set to 1. For faster syncs, set this value to more than 1 but that might result in the Google Analytics API returning [sampled data](#sampled-data-in-reports), potentially causing inaccuracies in the returned results. The maximum allowed value is 364. + + + +## Supported sync modes + +The Google Analytics source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) + +:::caution + +You need to add the service account email address on the account level, not the property level. Otherwise, an 403 error will be returned. + +::: + +## Supported streams + +The Google Analytics (Universal Analytics) source connector can sync the following tables: + +| Stream name | Schema | +|:-------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| website_overview | `{"ga_date":"2021-02-11","ga_users":1,"ga_newUsers":0,"ga_sessions":9,"ga_sessionsPerUser":9.0,"ga_avgSessionDuration":28.77777777777778,"ga_pageviews":63,"ga_pageviewsPerSession":7.0,"ga_avgTimeOnPage":4.685185185185185,"ga_bounceRate":0.0,"ga_exitRate":14.285714285714285,"view_id":"211669975"}` | +| traffic_sources | `{"ga_date":"2021-02-11","ga_source":"(direct)","ga_medium":"(none)","ga_socialNetwork":"(not set)","ga_users":1,"ga_newUsers":0,"ga_sessions":9,"ga_sessionsPerUser":9.0,"ga_avgSessionDuration":28.77777777777778,"ga_pageviews":63,"ga_pageviewsPerSession":7.0,"ga_avgTimeOnPage":4.685185185185185,"ga_bounceRate":0.0,"ga_exitRate":14.285714285714285,"view_id":"211669975"}` | +| pages | `{"ga_date":"2021-02-11","ga_hostname":"mydemo.com","ga_pagePath":"/home5","ga_pageviews":63,"ga_uniquePageviews":9,"ga_avgTimeOnPage":4.685185185185185,"ga_entrances":9,"ga_entranceRate":14.285714285714285,"ga_bounceRate":0.0,"ga_exits":9,"ga_exitRate":14.285714285714285,"view_id":"211669975"}` | +| locations | `{"ga_date":"2021-02-11","ga_continent":"Americas","ga_subContinent":"Northern America","ga_country":"United States","ga_region":"Iowa","ga_metro":"Des Moines-Ames IA","ga_city":"Des Moines","ga_users":1,"ga_newUsers":0,"ga_sessions":1,"ga_sessionsPerUser":1.0,"ga_avgSessionDuration":29.0,"ga_pageviews":7,"ga_pageviewsPerSession":7.0,"ga_avgTimeOnPage":4.666666666666667,"ga_bounceRate":0.0,"ga_exitRate":14.285714285714285,"view_id":"211669975"}` | +| monthly_active_users | `{"ga_date":"2021-02-11","ga_30dayUsers":1,"view_id":"211669975"}` | +| four_weekly_active_users | `{"ga_date":"2021-02-11","ga_28dayUsers":1,"view_id":"211669975"}` | +| two_weekly_active_users | `{"ga_date":"2021-02-11","ga_14dayUsers":1,"view_id":"211669975"}` | +| weekly_active_users | `{"ga_date":"2021-02-11","ga_7dayUsers":1,"view_id":"211669975"}` | +| daily_active_users | `{"ga_date":"2021-02-11","ga_1dayUsers":1,"view_id":"211669975"}` | +| devices | `{"ga_date":"2021-02-11","ga_deviceCategory":"desktop","ga_operatingSystem":"Macintosh","ga_browser":"Chrome","ga_users":1,"ga_newUsers":0,"ga_sessions":9,"ga_sessionsPerUser":9.0,"ga_avgSessionDuration":28.77777777777778,"ga_pageviews":63,"ga_pageviewsPerSession":7.0,"ga_avgTimeOnPage":4.685185185185185,"ga_bounceRate":0.0,"ga_exitRate":14.285714285714285,"view_id":"211669975"}` | +| Any custom reports | See [below](https://docs.airbyte.com/integrations/sources/google-analytics-v4#reading-custom-reports) for details. | + +Reach out to us on Slack or [create an issue](https://github.com/airbytehq/airbyte/issues) if you need to send custom Google Analytics report data with Airbyte. + +## Rate Limits and Performance Considerations \(Airbyte Open Source\) + +[Analytics Reporting API v4](https://developers.google.com/analytics/devguides/reporting/core/v4/limits-quotas) + +- Number of requests per day per project: 50,000 +- Number of requests per view (profile) per day: 10,000 (cannot be increased) +- Number of requests per 100 seconds per project: 2,000 +- Number of requests per 100 seconds per user per project: 100 (can be increased in Google API Console to 1,000). + +The Google Analytics connector should not run into the "requests per 100 seconds" limitation under normal usage. [Create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully and try increasing the `window_in_days` value. + +## Sampled data in reports + +If you are not on the Google Analytics 360 tier, the Google Analytics API may return sampled data if the amount of data in your Google Analytics account exceeds Google's [pre-determined compute thresholds](https://support.google.com/analytics/answer/2637192?hl=en&ref_topic=2601030&visit_id=637868645346124317-2833523666&rd=1#thresholds&zippy=%2Cin-this-article). This means the data returned in the report is an estimate which may have some inaccuracy. This [Google page](https://support.google.com/analytics/answer/2637192) provides a comprehensive overview of how Google applies sampling to your data. + +In order to minimize the chances of sampling being applied to your data, Airbyte makes data requests to Google in one day increments (the smallest allowed date increment). This reduces the amount of data the Google API processes per request, thus minimizing the chances of sampling being applied. The downside of requesting data in one day increments is that it increases the time it takes to export your Google Analytics data. If sampling is not a concern, you can override this behavior by setting the optional `window_in_day` parameter to specify the number of days to look back and avoid sampling. +When sampling occurs, a warning is logged to the sync log. + +## Requesting Custom Reports + +Custom Reports allow for flexibility in the reporting dimensions and metrics to meet your specific use case. Use the [GA4 Query Explorer](https://ga-dev-tools.google/ga4/query-explorer/) to help build your report. To ensure your dimensions and metrics are compatible, you can also refer to the [GA4 Dimensions & Metrics Explorer](https://ga-dev-tools.google/ga4/dimensions-metrics-explorer/). + +A custom report is formatted as: `[{"name": "", "dimensions": ["", ...], "metrics": ["", ...]}]` + +Example of a custom report: +```json +[{ + "name" : "page_views_and_users", + "dimensions" :[ + "ga:date", + "ga:pagePath", + "ga:sessionDefaultChannelGrouping" + ], + "metrics" :[ + "ga:screenPageViews", + "ga:totalUsers" + ] +}] +``` +Multiple custom reports should be entered with a comma separator. Each custom report is created as it's own stream. +Example of multiple custom reports: +```json +[ + { + "name" : "page_views_and_users", + "dimensions" :[ + "ga:date", + "ga:pagePath" + ], + "metrics" :[ + "ga:screenPageViews", + "ga:totalUsers" + ] + }, + { + "name" : "sessions_by_region", + "dimensions" :[ + "ga:date", + "ga:region" + ], + "metrics" :[ + "ga:totalUsers", + "ga:sessions" + ] + } +] +``` + +Custom reports can also include segments and filters to pull a subset of your data. The report should be formatted as: +```json +[ + { + "name": "", + "dimensions": ["", ...], + "metrics": ["", ...], + "segments": ["", ...], + "filter": "" + } +] +``` + +* When using segments, make sure you also add the `ga:segment` dimension. + +Example of a custom report with segments and/or filters: +```json +[{ "name" : "page_views_and_users", + "dimensions" :[ + "ga:date", + "ga:pagePath", + "ga:segment" + ], + "metrics" :[ + "ga:sessions", + "ga:totalUsers" + ], + "segments" :[ + "ga:sessionSource!=(direct)" + ], + "filter" :[ + "ga:sessionSource!=(direct);ga:sessionSource!=(not set)" + ] +}] +``` + +To create a list of dimensions, you can use default Google Analytics dimensions (listed below) or custom dimensions if you have some defined. Each report can contain no more than 7 dimensions, and they must all be unique. The default Google Analytics dimensions are: + +- `ga:browser` +- `ga:city` +- `ga:continent` +- `ga:country` +- `ga:date` +- `ga:deviceCategory` +- `ga:hostname` +- `ga:medium` +- `ga:metro` +- `ga:operatingSystem` +- `ga:pagePath` +- `ga:region` +- `ga:socialNetwork` +- `ga:source` +- `ga:subContinent` + +To create a list of metrics, use a default Google Analytics metric (values from the list below) or custom metrics if you have defined them. +A custom report can contain no more than 10 unique metrics. The default available Google Analytics metrics are: + +- `ga:14dayUsers` +- `ga:1dayUsers` +- `ga:28dayUsers` +- `ga:30dayUsers` +- `ga:7dayUsers` +- `ga:avgSessionDuration` +- `ga:avgTimeOnPage` +- `ga:bounceRate` +- `ga:entranceRate` +- `ga:entrances` +- `ga:exitRate` +- `ga:exits` +- `ga:newUsers` +- `ga:pageviews` +- `ga:pageviewsPerSession` +- `ga:sessions` +- `ga:sessionsPerUser` +- `ga:uniquePageviews` +- `ga:users` + +Incremental sync is supported only if you add `ga:date` dimension to your custom report. + +## Limitations & Troubleshooting + +
+ +Expand to see details about Google Analytics v4 connector limitations and troubleshooting. + + +### Connector limitations + +#### Rate limiting + +[Analytics Reporting API v4](https://developers.google.com/analytics/devguides/reporting/core/v4/limits-quotas) + +- Number of requests per day per project: 50,000 +- Number of requests per view (profile) per day: 10,000 (cannot be increased) +- Number of requests per 100 seconds per project: 2,000 +- Number of requests per 100 seconds per user per project: 100 (can be increased in Google API Console to 1,000). + +The Google Analytics connector should not run into the "requests per 100 seconds" limitation under normal usage. [Create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully and try increasing the `window_in_days` value. + +### Troubleshooting + + + +* Check out common troubleshooting issues for the Google Analytics v4 source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions). + +
+ +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:----------------------------------------------------------|:----------------| +| 0.0.1 | 2023-01-22 | [34323](https://github.com/airbytehq/airbyte/pull/34323) | Initial Release | + +
\ No newline at end of file diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 48b65cbae456..4b26dc27a15d 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -283,6 +283,7 @@ The Google Analytics connector should not run into the "requests per 100 seconds | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------| +| 0.2.4 | 2024-01-22 | [34323](https://github.com/airbytehq/airbyte/pull/34323) | Update setup dependencies | | 0.2.3 | 2024-01-18 | [34353](https://github.com/airbytehq/airbyte/pull/34353) | Add End date option | | 0.2.2 | 2023-10-19 | [31599](https://github.com/airbytehq/airbyte/pull/31599) | Base image migration: remove Dockerfile and use the python-connector-base image | | 0.2.1 | 2023-07-11 | [28149](https://github.com/airbytehq/airbyte/pull/28149) | Specify date format to support datepicker in UI |