Skip to content

Commit

Permalink
🐛 Source Harvest: Improve HTTP Availability (#35541)
Browse files Browse the repository at this point in the history
  • Loading branch information
maxi297 authored Feb 26, 2024
1 parent f509404 commit 5ff133f
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: fe2b4084-3386-4d3b-9ad6-308f61a6f1e6
dockerImageTag: 0.1.23
dockerImageTag: 0.1.24
dockerRepository: airbyte/source-harvest
documentationUrl: https://docs.airbyte.com/integrations/sources/harvest
githubIssueLabel: source-harvest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
version = "0.1.22"
version = "0.1.24"
name = "source-harvest"
description = "Source implementation for Harvest."
authors = [ "Airbyte <contact@airbyte.io>",]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

import logging
from typing import Dict

import requests
from airbyte_cdk.sources import Source
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy
from requests import HTTPError


class HarvestAvailabilityStrategy(HttpAvailabilityStrategy):
"""
This class is tested as part of test_source.check_connection
"""

def reasons_for_unavailable_status_codes(
self, stream: Stream, logger: logging.Logger, source: Source, error: HTTPError
) -> Dict[int, str]:
reasons_for_codes: Dict[int, str] = {
requests.codes.UNAUTHORIZED: "Please ensure your credentials are valid.",
requests.codes.FORBIDDEN: "This is most likely due to insufficient permissions on the credentials in use.",
requests.codes.NOT_FOUND: "Please ensure that your account ID is properly set. If it is the case and you are still seeing this error, please contact Airbyte support.",
}
return reasons_for_codes
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


from typing import Any, List, Mapping, Tuple
import logging
from typing import Any, List, Mapping, Optional, Tuple

import pendulum
from airbyte_cdk.logger import AirbyteLogger
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.utils import AirbyteTracedException
from airbyte_protocol.models import FailureType
from source_harvest.availability_strategy import HarvestAvailabilityStrategy
from source_harvest.streams import (
BillableRates,
Clients,
Expand Down Expand Up @@ -53,6 +54,11 @@ class SourceHarvest(AbstractSource):
def get_authenticator(config):
credentials = config.get("credentials", {})
if credentials and "client_id" in credentials:
if "account_id" not in config:
raise AirbyteTracedException(
"Config validation error: 'account_id' is a required property",
failure_type=FailureType.config_error,
)
return HarvestOauth2Authenticator(
token_refresh_endpoint="https://id.getharvest.com/api/v2/oauth2/token",
client_id=credentials.get("client_id"),
Expand All @@ -63,20 +69,17 @@ def get_authenticator(config):

api_token = credentials.get("api_token", config.get("api_token"))
if not api_token:
raise Exception("Config validation error: 'api_token' is a required property")
raise AirbyteTracedException(
"Config validation error: 'api_token' is a required property",
failure_type=FailureType.config_error,
)
return HarvestTokenAuthenticator(token=api_token, account_id=config["account_id"])

def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
try:
auth = self.get_authenticator(config)
replication_start_date = pendulum.parse(config["replication_start_date"])
users_gen = Users(authenticator=auth, replication_start_date=replication_start_date).read_records(
sync_mode=SyncMode.full_refresh
)
next(users_gen)
return True, None
except Exception as error:
return False, f"Unable to connect to Harvest API with the provided credentials - {repr(error)}"
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[str]]:
auth = self.get_authenticator(config)
replication_start_date = pendulum.parse(config["replication_start_date"])
users_stream = Users(authenticator=auth, replication_start_date=replication_start_date)
return HarvestAvailabilityStrategy().check_availability(users_stream, logger, self)

def streams(self, config: Mapping[str, Any]) -> List[Stream]:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,9 @@ def with_api_token(self, api_token: str) -> "ConfigBuilder":
self._config["credentials"]["api_token"] = api_token
return self

def with_client_id(self, client_id: str) -> "ConfigBuilder":
self._config["credentials"]["client_id"] = client_id
return self

def build(self) -> Dict[str, Any]:
return self._config
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

from datetime import datetime, timedelta, timezone
from datetime import datetime
from typing import Any, Dict, Optional
from unittest import TestCase

from airbyte_cdk.sources.source import TState
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest
from airbyte_cdk.test.mock_http.response_builder import (
FieldPath,
HttpResponseBuilder,
NestedPath,
RecordBuilder,
create_record_builder,
create_response_builder,
find_template,
)
from airbyte_protocol.models import ConfiguredAirbyteCatalog, FailureType, SyncMode
from integration.config import ConfigBuilder
from airbyte_protocol.models import SyncMode
from config import ConfigBuilder
from source_harvest import SourceHarvest

_A_REPLICATION_START_DATE = "2021-01-01T00:00:00+00:00"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

from datetime import datetime, timedelta, timezone
from datetime import datetime
from typing import Any, Dict, Optional
from unittest import TestCase

from airbyte_cdk.sources.source import TState
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
from airbyte_cdk.test.mock_http.response_builder import (
FieldPath,
HttpResponseBuilder,
NestedPath,
RecordBuilder,
create_record_builder,
create_response_builder,
find_template,
)
from airbyte_protocol.models import ConfiguredAirbyteCatalog, FailureType, SyncMode
from integration.config import ConfigBuilder
from airbyte_protocol.models import SyncMode
from config import ConfigBuilder
from source_harvest import SourceHarvest

_A_REPLICATION_START_DATE = "2021-01-01T00:00:00+00:00"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

import unittest
from unittest.mock import Mock, patch

import pytest
import requests
from airbyte_cdk import AirbyteLogger
from airbyte_cdk.models import FailureType
from airbyte_cdk.utils import AirbyteTracedException
from config import ConfigBuilder
from requests import HTTPError
from source_harvest.source import SourceHarvest


def _a_response(status_code: int) -> requests.Response:
response = Mock(spec=requests.Response)
response.status_code = status_code
response.url = "any url"
response.reason = "any reason"
return response


class SourceTest(unittest.TestCase):

def setUp(self) -> None:
self._source = SourceHarvest()
self._logger = Mock(spec=AirbyteLogger)
self._config = ConfigBuilder().build()

def test_given_config_with_client_id_without_account_id_when_check_connection_then_raise_config_error(self) -> None:
config = ConfigBuilder().with_client_id("a client id").build()
config.pop("account_id")

with pytest.raises(AirbyteTracedException) as exception_trace:
self._source.check_connection(self._logger, config)
assert exception_trace.value.failure_type == FailureType.config_error

def test_given_config_no_authentication_in_config_when_check_connection_then_raise_config_error(self) -> None:
config = ConfigBuilder().build()
config["credentials"].pop("api_token", None)
config["credentials"].pop("client_id", None)

with pytest.raises(AirbyteTracedException) as exception_trace:
self._source.check_connection(self._logger, config)
assert exception_trace.value.failure_type == FailureType.config_error

@patch("source_harvest.source.Users.read_records")
def test_given_400_http_error_when_check_connection_then_raise_non_config_error(self, mocked_user_read_records) -> None:
"""
Following https://github.com/airbytehq/airbyte/pull/35305 where no page alerts were emitted
"""
mocked_user_read_records.side_effect = HTTPError(response=_a_response(400))

with pytest.raises(Exception) as exception:
self._source.check_connection(self._logger, self._config)
assert not isinstance(exception, AirbyteTracedException) or exception.failure_type != FailureType.config_error

@patch("source_harvest.source.Users.read_records")
def test_given_401_http_error_when_check_connection_then_is_not_available(self, mocked_user_read_records) -> None:
mocked_user_read_records.side_effect = HTTPError(response=_a_response(401))
is_available, _ = self._source.check_connection(self._logger, self._config)
assert not is_available

@patch("source_harvest.source.Users.read_records")
def test_given_403_http_error_when_check_connection_then_is_not_available(self, mocked_user_read_records) -> None:
mocked_user_read_records.side_effect = HTTPError(response=_a_response(403))
is_available, _ = self._source.check_connection(self._logger, self._config)
assert not is_available

@patch("source_harvest.source.Users.read_records")
def test_given_404_http_error_when_check_connection_then_is_not_available(self, mocked_user_read_records) -> None:
mocked_user_read_records.side_effect = HTTPError(response=_a_response(404))
is_available, _ = self._source.check_connection(self._logger, self._config)
assert not is_available
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,6 @@
logger = AirbyteLogger()


def test_check_connection_ok(config, mock_stream):
mock_stream("users", response={"users": [{"id": 1}], "next_page": 2})
ok, error_msg = SourceHarvest().check_connection(logger, config=config)

assert ok
assert not error_msg


def test_check_connection_empty_config(config):
config = {}

ok, error_msg = SourceHarvest().check_connection(logger, config=config)

assert not ok
assert error_msg


def test_check_connection_invalid_config(config):
config.pop("replication_start_date")
ok, error_msg = SourceHarvest().check_connection(logger, config=config)

assert not ok
assert error_msg


def test_check_connection_exception(config):
ok, error_msg = SourceHarvest().check_connection(logger, config=config)

assert not ok
assert error_msg


def test_streams(config):
streams = SourceHarvest().streams(config)

Expand Down
49 changes: 25 additions & 24 deletions docs/integrations/sources/harvest.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,28 +79,29 @@ The connector is restricted by the [Harvest rate limits](https://help.getharvest

## Changelog

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------| :-------------------------------------------------------------------------------- |
| 0.1.23 | 2024-02-19 | [35305](https://github.com/airbytehq/airbyte/pull/35305) | Fix pendulum parsing error |
| 0.1.22 | 2024-02-12 | [35154](https://github.com/airbytehq/airbyte/pull/35154) | Manage dependencies with Poetry. |
| 0.1.21 | 2023-11-30 | [33003](https://github.com/airbytehq/airbyte/pull/33003) | Update expected records |
| 0.1.20 | 2023-10-19 | [31599](https://github.com/airbytehq/airbyte/pull/31599) | Base image migration: remove Dockerfile and use the python-connector-base image |
| 0.1.19 | 2023-07-26 | [28755](https://github.com/airbytehq/airbyte/pull/28755) | Changed parameters for Time Reports to use 365 days as opposed to 1 year |
| 0.1.18 | 2023-05-29 | [26714](https://github.com/airbytehq/airbyte/pull/26714) | Remove `authSpecification` from spec in favour of `advancedAuth` |
| 0.1.17 | 2023-03-03 | [22983](https://github.com/airbytehq/airbyte/pull/22983) | Specified date formatting in specification |
| 0.1.16 | 2023-02-07 | [22417](https://github.com/airbytehq/airbyte/pull/22417) | Turn on default HttpAvailabilityStrategy |
| 0.1.15 | 2023-01-27 | [22008](https://github.com/airbytehq/airbyte/pull/22008) | Set `AvailabilityStrategy` for streams explicitly to `None` |
| 0.1.14 | 2023-01-09 | [21151](https://github.com/airbytehq/airbyte/pull/21151) | Skip 403 FORBIDDEN for all stream |
| 0.1.13 | 2022-12-22 | [20810](https://github.com/airbytehq/airbyte/pull/20810) | Skip 403 FORBIDDEN for `EstimateItemCategories` stream |
| 0.1.12 | 2022-12-16 | [20572](https://github.com/airbytehq/airbyte/pull/20572) | Introduce replication end date |
| 0.1.11 | 2022-09-28 | [17326](https://github.com/airbytehq/airbyte/pull/17326) | Migrate to per-stream states. |
| 0.1.10 | 2022-08-08 | [15221](https://github.com/airbytehq/airbyte/pull/15221) | Added `parent_id` for all streams which have parent stream |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------|
| 0.1.24 | 2024-02-26 | [35541](https://github.com/airbytehq/airbyte/pull/35541) | Improve check command to avoid missing alerts |
| 0.1.23 | 2024-02-19 | [35305](https://github.com/airbytehq/airbyte/pull/35305) | Fix pendulum parsing error |
| 0.1.22 | 2024-02-12 | [35154](https://github.com/airbytehq/airbyte/pull/35154) | Manage dependencies with Poetry. |
| 0.1.21 | 2023-11-30 | [33003](https://github.com/airbytehq/airbyte/pull/33003) | Update expected records |
| 0.1.20 | 2023-10-19 | [31599](https://github.com/airbytehq/airbyte/pull/31599) | Base image migration: remove Dockerfile and use the python-connector-base image |
| 0.1.19 | 2023-07-26 | [28755](https://github.com/airbytehq/airbyte/pull/28755) | Changed parameters for Time Reports to use 365 days as opposed to 1 year |
| 0.1.18 | 2023-05-29 | [26714](https://github.com/airbytehq/airbyte/pull/26714) | Remove `authSpecification` from spec in favour of `advancedAuth` |
| 0.1.17 | 2023-03-03 | [22983](https://github.com/airbytehq/airbyte/pull/22983) | Specified date formatting in specification |
| 0.1.16 | 2023-02-07 | [22417](https://github.com/airbytehq/airbyte/pull/22417) | Turn on default HttpAvailabilityStrategy |
| 0.1.15 | 2023-01-27 | [22008](https://github.com/airbytehq/airbyte/pull/22008) | Set `AvailabilityStrategy` for streams explicitly to `None` |
| 0.1.14 | 2023-01-09 | [21151](https://github.com/airbytehq/airbyte/pull/21151) | Skip 403 FORBIDDEN for all stream |
| 0.1.13 | 2022-12-22 | [20810](https://github.com/airbytehq/airbyte/pull/20810) | Skip 403 FORBIDDEN for `EstimateItemCategories` stream |
| 0.1.12 | 2022-12-16 | [20572](https://github.com/airbytehq/airbyte/pull/20572) | Introduce replication end date |
| 0.1.11 | 2022-09-28 | [17326](https://github.com/airbytehq/airbyte/pull/17326) | Migrate to per-stream states. |
| 0.1.10 | 2022-08-08 | [15221](https://github.com/airbytehq/airbyte/pull/15221) | Added `parent_id` for all streams which have parent stream |
| 0.1.9 | 2022-08-04 | [15312](https://github.com/airbytehq/airbyte/pull/15312) | Fix `started_time` and `ended_time` format schema error and updated report slicing |
| 0.1.8 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update titles and descriptions |
| 0.1.6 | 2021-11-14 | [7952](https://github.com/airbytehq/airbyte/pull/7952) | Implement OAuth 2.0 support |
| 0.1.5 | 2021-09-28 | [5747](https://github.com/airbytehq/airbyte/pull/5747) | Update schema date-time fields |
| 0.1.4 | 2021-06-22 | [5701](https://github.com/airbytehq/airbyte/pull/5071) | Harvest normalization failure: fixing the schemas |
| 0.1.3 | 2021-06-22 | [4274](https://github.com/airbytehq/airbyte/pull/4274) | Fix wrong data type on `statement_key` in `clients` stream |
| 0.1.2 | 2021-06-07 | [4222](https://github.com/airbytehq/airbyte/pull/4222) | Correct specification parameter name |
| 0.1.1 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support |
| 0.1.0 | 2021-06-07 | [3709](https://github.com/airbytehq/airbyte/pull/3709) | Release Harvest connector! |
| 0.1.8 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update titles and descriptions |
| 0.1.6 | 2021-11-14 | [7952](https://github.com/airbytehq/airbyte/pull/7952) | Implement OAuth 2.0 support |
| 0.1.5 | 2021-09-28 | [5747](https://github.com/airbytehq/airbyte/pull/5747) | Update schema date-time fields |
| 0.1.4 | 2021-06-22 | [5701](https://github.com/airbytehq/airbyte/pull/5071) | Harvest normalization failure: fixing the schemas |
| 0.1.3 | 2021-06-22 | [4274](https://github.com/airbytehq/airbyte/pull/4274) | Fix wrong data type on `statement_key` in `clients` stream |
| 0.1.2 | 2021-06-07 | [4222](https://github.com/airbytehq/airbyte/pull/4222) | Correct specification parameter name |
| 0.1.1 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support |
| 0.1.0 | 2021-06-07 | [3709](https://github.com/airbytehq/airbyte/pull/3709) | Release Harvest connector! |

0 comments on commit 5ff133f

Please sign in to comment.