Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 Source Github: Add MultipleTokenAuthenticator #5223

Merged
merged 18 commits into from
Aug 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions airbyte-cdk/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.10
Add `MultipleTokenAuthenticator` class to allow cycling through a list of API tokens when making HTTP requests

## 0.1.8
Allow to fetch primary key info from singer catalog

Expand Down
2 changes: 1 addition & 1 deletion airbyte-cdk/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ All tests are located in the `unit_tests` directory. Run `pytest --cov=airbyte_c

1. Bump the package version in `setup.py`
2. Open a PR
3. An Airbyte member must comment `/publish-cdk --dry-run=<true or false>`. Dry runs publish to test.pypi.org.
3. An Airbyte member must comment `/publish-cdk dry-run=true` to publish the package to test.pypi.org or `/publish-cdk dry-run=false` to publish it to the real index of pypi.org.

## Coming Soon

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Initialize Auth Package
from .core import HttpAuthenticator, NoAuth
from .oauth import Oauth2Authenticator
from .token import TokenAuthenticator
from .token import MultipleTokenAuthenticator, TokenAuthenticator

__all__ = [
"HttpAuthenticator",
"NoAuth",
"Oauth2Authenticator",
"TokenAuthenticator",
"MultipleTokenAuthenticator",
]
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
#


from typing import Any, Mapping
from itertools import cycle
from typing import Any, List, Mapping

from .core import HttpAuthenticator

Expand All @@ -36,3 +37,14 @@ def __init__(self, token: str, auth_method: str = "Bearer", auth_header: str = "

def get_auth_header(self) -> Mapping[str, Any]:
return {self.auth_header: f"{self.auth_method} {self._token}"}


class MultipleTokenAuthenticator(HttpAuthenticator):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add a doc string explaining what this does e.g:

Uses the input list of tokens for authentication in a round-robin fashion. This allows load balancing quota consumption across multiple tokens. 

def __init__(self, tokens: List[str], auth_method: str = "Bearer", auth_header: str = "Authorization"):
self.auth_method = auth_method
self.auth_header = auth_header
self._tokens = tokens
self._tokens_iter = cycle(self._tokens)

def get_auth_header(self) -> Mapping[str, Any]:
return {self.auth_header: f"{self.auth_method} {next(self._tokens_iter)}"}
2 changes: 1 addition & 1 deletion airbyte-cdk/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

setup(
name="airbyte-cdk",
version="0.1.8",
version="0.1.10",
description="A framework for writing Airbyte Connectors.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import logging

import requests
from airbyte_cdk.sources.streams.http.auth import NoAuth, Oauth2Authenticator, TokenAuthenticator
from airbyte_cdk.sources.streams.http.auth import MultipleTokenAuthenticator, NoAuth, Oauth2Authenticator, TokenAuthenticator
from requests import Response

LOGGER = logging.getLogger(__name__)
Expand All @@ -43,6 +43,16 @@ def test_token_authenticator():
assert {"Authorization": "Bearer test-token"} == header


def test_multiple_token_authenticator():
token = MultipleTokenAuthenticator(["token1", "token2"])
header1 = token.get_auth_header()
assert {"Authorization": "Bearer token1"} == header1
header2 = token.get_auth_header()
assert {"Authorization": "Bearer token2"} == header2
header3 = token.get_auth_header()
assert {"Authorization": "Bearer token1"} == header3


def test_no_auth():
"""
Should always return empty body, no matter how many times token is retrieved.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

@pytest.fixture(scope="session", autouse=True)
def connector_setup():
""" This fixture is a placeholder for external resources that acceptance test might require."""
"""This fixture is a placeholder for external resources that acceptance test might require."""
# TODO: setup test dependencies if needed. otherwise remove the TODO comments
yield
# TODO: clean up test dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
from airbyte_cdk.sources.streams.http.auth import MultipleTokenAuthenticator

from .streams import (
Assignees,
Expand All @@ -53,8 +53,12 @@
)


TOKEN_SEPARATOR = ","


class SourceGithub(AbstractSource):
def _generate_repositories(self, config: Mapping[str, Any], authenticator: TokenAuthenticator) -> List[str]:
@staticmethod
def _generate_repositories(config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator) -> List[str]:
repositories = list(filter(None, config["repository"].split(" ")))

if not repositories:
Expand All @@ -65,27 +69,35 @@ def _generate_repositories(self, config: Mapping[str, Any], authenticator: Token
if organizations:
repos = Repositories(authenticator=authenticator, organizations=organizations)
for stream in repos.stream_slices(sync_mode=SyncMode.full_refresh):
repositories_list += [
repository["full_name"] for repository in repos.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream)
]
repositories_list += [r["full_name"] for r in repos.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream)]

return list(set(repositories_list))

@staticmethod
def _get_authenticator(token: str):
tokens = [t.strip() for t in token.split(TOKEN_SEPARATOR)]
return MultipleTokenAuthenticator(tokens=tokens, auth_method="token")

def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
try:
authenticator = TokenAuthenticator(token=config["access_token"], auth_method="token")
authenticator = self._get_authenticator(config["access_token"])
repositories = self._generate_repositories(config=config, authenticator=authenticator)

# We should use the most poorly filled stream to use the `list` method, because when using the `next` method, we can get the `StopIteration` error.
projects_stream = Projects(authenticator=authenticator, repositories=repositories, start_date=config["start_date"])
# We should use the most poorly filled stream to use the `list` method,
# because when using the `next` method, we can get the `StopIteration` error.
projects_stream = Projects(
authenticator=authenticator,
repositories=repositories,
start_date=config["start_date"],
)
for stream in projects_stream.stream_slices(sync_mode=SyncMode.full_refresh):
list(projects_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream))
return True, None
except Exception as e:
return False, repr(e)

def streams(self, config: Mapping[str, Any]) -> List[Stream]:
authenticator = TokenAuthenticator(token=config["access_token"], auth_method="token")
authenticator = self._get_authenticator(config["access_token"])
repositories = self._generate_repositories(config=config, authenticator=authenticator)
full_refresh_args = {"authenticator": authenticator, "repositories": repositories}
incremental_args = {**full_refresh_args, "start_date": config["start_date"]}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"properties": {
"access_token": {
"type": "string",
gaart marked this conversation as resolved.
Show resolved Hide resolved
"description": "Log into Github and then generate a <a href=\"https://github.com/settings/tokens\"> personal access token</a>.",
"title": "Access Tokens",
"description": "Log into Github and then generate a <a href=\"https://github.com/settings/tokens\"> personal access token</a>. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with \",\"",
"airbyte_secret": true
},
"repository": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@
# SOFTWARE.
#

from airbyte_cdk.sources.streams.http.auth import MultipleTokenAuthenticator
from source_github import SourceGithub

def test_example_method():
assert True

def test_single_token():
authenticator = SourceGithub._get_authenticator("123")
assert isinstance(authenticator, MultipleTokenAuthenticator)
assert ["123"] == authenticator._tokens


def test_multiple_tokens():
authenticator = SourceGithub._get_authenticator("123, 456")
assert isinstance(authenticator, MultipleTokenAuthenticator)
assert ["123", "456"] == authenticator._tokens
2 changes: 1 addition & 1 deletion docs/connector-development/cdk-python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ All tests are located in the `unit_tests` directory. Run `pytest --cov=airbyte_c

1. Bump the package version in `setup.py`
2. Open a PR
3. An Airbyte member must comment `/publish-cdk --dry-run=<true or false>`. Dry runs publish to test.pypi.org.
3. An Airbyte member must comment `/publish-cdk dry-run=true` to publish the package to test.pypi.org or `/publish-cdk dry-run=false` to publish it to the real index of pypi.org.

## Coming Soon

Expand Down