diff --git a/airbyte-lib/README.md b/airbyte-lib/README.md index c65e5fba04fc..3fb34c0d517b 100644 --- a/airbyte-lib/README.md +++ b/airbyte-lib/README.md @@ -4,17 +4,44 @@ airbyte-lib is a library that allows to run Airbyte syncs embedded into any Pyth ## Development -* Make sure [Poetry is installed](https://python-poetry.org/docs/#). -* Run `poetry install` -* For examples, check out the `examples` folder. They can be run via `poetry run python examples/` -* Unit tests and type checks can be run via `poetry run pytest` +- Make sure [Poetry is installed](https://python-poetry.org/docs/#). +- Run `poetry install` +- For examples, check out the `examples` folder. They can be run via `poetry run python examples/` +- Unit tests and type checks can be run via `poetry run pytest` ## Release -* In your PR: - * Bump the version in `pyproject.toml` - * Add a changelog entry to the table below -* Once the PR is merged, go to Github and trigger the `Publish AirbyteLib Manually` workflow. This will publish the new version to PyPI. +- In your PR: + - Bump the version in `pyproject.toml` + - Add a changelog entry to the table below +- Once the PR is merged, go to Github and trigger the `Publish AirbyteLib Manually` workflow. This will publish the new version to PyPI. + +## Secrets Management + +AirbyteLib can auto-import secrets from the following sources: + +1. Environment variables. +2. [Google Colab secrets](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75). +3. Manual entry via [`getpass`](https://docs.python.org/3.9/library/getpass.html). + +_Note: Additional secret store options may be supported in the future. [More info here.](https://github.com/airbytehq/airbyte-lib-private-beta/discussions/5)_ + +### Retrieving Secrets + +```python +from airbyte_lib import get_secret, SecretSource + +source = get_connection("source-github") +source.set_config( + "credentials": { + "personal_access_token": get_secret("GITHUB_PERSONAL_ACCESS_TOKEN"), + } +) +``` + +The `get_secret()` function accepts an optional `source` argument of enum type `SecretSource`. If omitted or set to `SecretSource.ANY`, AirbyteLib will search all available secrets sources. If `source` is set to a specific source, then only that source will be checked. If a list of `SecretSource` entries is passed, then the sources will be checked using the provided ordering. + +By default, AirbyteLib will prompt the user for any requested secrets that are not provided via other secret managers. You can disable this prompt by passing `prompt=False` to `get_secret()`. ### Versioning @@ -24,13 +51,13 @@ Versioning follows [Semantic Versioning](https://semver.org/). For new features, Regular documentation lives in the `/docs` folder. Based on the doc strings of public methods, we generate API documentation using [pdoc](https://pdoc.dev). To generate the documentation, run `poetry run generate-docs`. The documentation will be generated in the `docs/generate` folder. This needs to be done manually when changing the public interface of the library. -A unit test validates the documentation is up to date. +A unit test validates the documentation is up to date. ## Validating source connectors To validate a source connector for compliance, the `airbyte-lib-validate-source` script can be used. It can be used like this: -``` +```bash airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json ``` diff --git a/airbyte-lib/airbyte_lib/__init__.py b/airbyte-lib/airbyte_lib/__init__.py index 86ffb5e0fa40..63afc6ce99c6 100644 --- a/airbyte-lib/airbyte_lib/__init__.py +++ b/airbyte-lib/airbyte_lib/__init__.py @@ -6,6 +6,7 @@ from airbyte_lib.caches import DuckDBCache, DuckDBCacheConfig from airbyte_lib.datasets import CachedDataset from airbyte_lib.results import ReadResult +from airbyte_lib.secrets import SecretSource, get_secret from airbyte_lib.source import Source @@ -15,7 +16,9 @@ "DuckDBCacheConfig", "get_connector", "get_default_cache", + "get_secret", "new_local_cache", "ReadResult", + "SecretSource", "Source", ] diff --git a/airbyte-lib/airbyte_lib/exceptions.py b/airbyte-lib/airbyte_lib/exceptions.py index ac808ff50831..bcd5fb966fc0 100644 --- a/airbyte-lib/airbyte_lib/exceptions.py +++ b/airbyte-lib/airbyte_lib/exceptions.py @@ -250,3 +250,16 @@ class AirbyteStreamNotFoundError(AirbyteConnectorError): stream_name: str | None = None available_streams: list[str] | None = None + + +@dataclass +class AirbyteLibSecretNotFoundError(AirbyteError): + """Secret not found.""" + + guidance = "Please ensure that the secret is set." + help_url = ( + "https://docs.airbyte.com/using-airbyte/airbyte-lib/getting-started#secrets-management" + ) + + secret_name: str | None = None + sources: list[str] | None = None diff --git a/airbyte-lib/airbyte_lib/secrets.py b/airbyte-lib/airbyte_lib/secrets.py new file mode 100644 index 000000000000..90774e9ee9c2 --- /dev/null +++ b/airbyte-lib/airbyte_lib/secrets.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +"""Secrets management for AirbyteLib.""" +from __future__ import annotations + +import os +from enum import Enum, auto +from getpass import getpass + +from airbyte_lib import exceptions as exc + + +class SecretSource(Enum): + ENV = auto() + GOOGLE_COLAB = auto() + ANY = auto() + + PROMPT = auto() + + +ALL_SOURCES = [ + SecretSource.ENV, + SecretSource.GOOGLE_COLAB, +] + +try: + from google.colab import userdata as colab_userdata +except ImportError: + colab_userdata = None + + +def get_secret( + secret_name: str, + source: SecretSource | list[SecretSource] = SecretSource.ANY, + *, + prompt: bool = True, +) -> str: + """Get a secret from the environment. + + The optional `source` argument of enum type `SecretSource` or list of `SecretSource` options. + If left blank, the `source` arg will be `SecretSource.ANY`. If `source` is set to a specific + source, then only that source will be checked. If a list of `SecretSource` entries is passed, + then the sources will be checked using the provided ordering. + + If `prompt` to `True` or if SecretSource.PROMPT is declared in the `source` arg, then the + user will be prompted to enter the secret if it is not found in any of the other sources. + """ + sources = [source] if not isinstance(source, list) else source + if SecretSource.ANY in sources: + sources += [s for s in ALL_SOURCES if s not in sources] + sources.remove(SecretSource.ANY) + + if prompt or SecretSource.PROMPT in sources: + if SecretSource.PROMPT in sources: + sources.remove(SecretSource.PROMPT) + + sources.append(SecretSource.PROMPT) # Always check prompt last + + for s in sources: + val = _get_secret_from_source(secret_name, s) + if val: + return val + + raise exc.AirbyteLibSecretNotFoundError( + secret_name=secret_name, + sources=[str(s) for s in sources], + ) + + +def _get_secret_from_source( + secret_name: str, + source: SecretSource, +) -> str | None: + if source in [SecretSource.ENV, SecretSource.ANY] and secret_name in os.environ: + return os.environ[secret_name] + + if ( + source in [SecretSource.GOOGLE_COLAB, SecretSource.ANY] + and colab_userdata is not None + and colab_userdata.get(secret_name, None) + ): + return colab_userdata.get(secret_name) + + if source == SecretSource.PROMPT: + return getpass(f"Enter the value for secret '{secret_name}': ") + + return None diff --git a/airbyte-lib/docs/generated/airbyte_lib.html b/airbyte-lib/docs/generated/airbyte_lib.html index 73e8779d3fd7..00b5d41acf68 100644 --- a/airbyte-lib/docs/generated/airbyte_lib.html +++ b/airbyte-lib/docs/generated/airbyte_lib.html @@ -295,6 +295,29 @@
Inherited Members
+ +
+
+ + def + get_secret( secret_name: str, source: SecretSource | list[SecretSource] = <SecretSource.ANY: 3>, *, prompt: bool = True) -> str: + + +
+ + +

Get a secret from the environment.

+ +

The optional source argument of enum type SecretSource or list of SecretSource options. +If left blank, the source arg will be SecretSource.ANY. If source is set to a specific +source, then only that source will be checked. If a list of SecretSource entries is passed, +then the sources will be checked using the provided ordering.

+ +

If prompt to True or if SecretSource.PROMPT is declared in the source arg, then the +user will be prompted to enter the secret if it is not found in any of the other sources.

+
+ +
@@ -404,6 +427,79 @@
Inherited Members
items
values
+
+ + +
+
+
+ + class + SecretSource(enum.Enum): + + +
+ + +

An enumeration.

+
+ + +
+
+ ENV = +<SecretSource.ENV: 1> + + +
+ + + + +
+
+
+ GOOGLE_COLAB = +<SecretSource.GOOGLE_COLAB: 2> + + +
+ + + + +
+
+
+ ANY = +<SecretSource.ANY: 3> + + +
+ + + + +
+
+
+ PROMPT = +<SecretSource.PROMPT: 4> + + +
+ + + + +
+
+
Inherited Members
+
+
enum.Enum
+
name
+
value
+
diff --git a/airbyte-lib/examples/run_faker.py b/airbyte-lib/examples/run_faker.py index 55d1017ed393..0418e5ec6fbb 100644 --- a/airbyte-lib/examples/run_faker.py +++ b/airbyte-lib/examples/run_faker.py @@ -26,4 +26,4 @@ result = source.read() for name, records in result.streams.items(): - print(f"Stream {name}: {len(list(records))} records") + print(f"Stream {name}: {len(records)} records") diff --git a/airbyte-lib/examples/run_github.py b/airbyte-lib/examples/run_github.py new file mode 100644 index 000000000000..e54154d21562 --- /dev/null +++ b/airbyte-lib/examples/run_github.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +"""A simple test of AirbyteLib, using the Faker source connector. + +Usage (from airbyte-lib root directory): +> poetry run python ./examples/run_faker.py + +No setup is needed, but you may need to delete the .venv-source-faker folder +if your installation gets interrupted or corrupted. +""" +from __future__ import annotations + +import airbyte_lib as ab + + +GITHUB_TOKEN = ab.get_secret("GITHUB_PERSONAL_ACCESS_TOKEN") + + +source = ab.get_connector("source-github") +source.set_config( + {"repositories": ["airbytehq/airbyte"], "credentials": {"personal_access_token": GITHUB_TOKEN}} +) +source.check() +source.set_streams(["products", "users", "purchases"]) + +result = source.read() + +for name, records in result.streams.items(): + print(f"Stream {name}: {len(records)} records")