Skip to content

Commit

Permalink
Move cloud auth to sdk (#779)
Browse files Browse the repository at this point in the history
The arguments that enabled authenticating to different clouds through
volume mounting credentials were restricted to the CLI and were not
explicitly available in the SDK.

This PR that moves the argument for authenticating to one of the clouds
in the DockerRunner for the CLI to the SDK

e.g 

authenticating to gcp 

```python
runner.run(auth_gcp=True)
```
  • Loading branch information
PhilippeMoussalli authored Jan 15, 2024
1 parent 255548d commit 0c92737
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 249 deletions.
37 changes: 9 additions & 28 deletions docs/runners/local.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,19 @@ about this in the [installation](../guides/installation.md) guide.
=== "GCP"

```bash
fondant run local <pipeline_ref> --auth-gcp
fondant run local <pipeline_ref> --auth-provider gcp
```

=== "AWS"

```bash
fondant run local <pipeline_ref> --auth-aws
fondant run local <pipeline_ref> --auth-provider aws
```

=== "Azure"

```bash
fondant run local <pipeline_ref> --auth-azure
fondant run local <pipeline_ref> --auth-provider azure
```

You can also use the `--extra-volumes` argument to mount extra credentials or additional files.
Expand All @@ -53,62 +53,43 @@ about this in the [installation](../guides/installation.md) guide.
=== "Python"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner

EXTRA_VOLUMES = <str_or_list_of_optional_extra_volumes_to_mount>
compiler = DockerCompiler(extra_volumes=EXTRA_VOLUMES)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(extra_volumes=<str_or_list_of_optional_extra_volumes_to_mount>)
```

If you want to use remote paths (GCS, S3, etc.) you can use pass the default local cloud credentials to the pipeline.
If you want to use remote paths (GCS, S3, etc.) you can use the authentification argument
in your pipeline

=== "GCP"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner
from fondant.core.schema import CloudCredentialsMount
gcp_mount_dir = CloudCredentialsMount.GCP.value
compiler = DockerCompiler(extra_volumes=gcp_mount_dir)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(auth_provider=CloudCredentialsMount.GCP)
```

=== "AWS"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner
from fondant.core.schema import CloudCredentialsMount
aws_mount_dir = CloudCredentialsMount.AWS.value
compiler = DockerCompiler(extra_volumes=aws_mount_dir)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(auth_provider=CloudCredentialsMount.AWS)
```

=== "Azure"

```python
from fondant.pipeline.compiler import DockerCompiler
from fondant.pipeline.runner import DockerRunner
from fondant.core.schema import CloudCredentialsMount
azure_mount_dir = CloudCredentialsMount.AZURE.value
compiler = DockerCompiler(extra_volumes=azure_mount_dir)
compiler.compile(pipeline=<pipeline_object>)

runner = DockerRunner()
runner.run(input_spec=<path_to_compiled_spec>)
runner.run(auth_provider=CloudCredentialsMount.AZURE)
```

This will mount your default local cloud credentials to the pipeline. Make sure you are authenticated locally before running the pipeline and
Expand Down
109 changes: 26 additions & 83 deletions src/fondant/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,12 @@
logger = logging.getLogger(__name__)


def get_cloud_credentials(args) -> t.Optional[str]:
if args.auth_gcp:
return CloudCredentialsMount.GCP.value
if args.auth_aws:
return CloudCredentialsMount.AWS.value
if args.auth_azure:
return CloudCredentialsMount.AZURE.value

return None
def cloud_credentials_arg(value):
try:
return CloudCredentialsMount[value.upper()]
except KeyError:
msg = f"Invalid CloudCredentialsMount value: {value}"
raise argparse.ArgumentTypeError(msg)


def entrypoint():
Expand Down Expand Up @@ -117,8 +114,6 @@ def register_explore(parent_parser):
start_parser = explore_subparser.add_parser(name="start", help="Start explorer app")
stop_parser = explore_subparser.add_parser(name="stop", help="Stop explorer app")

auth_group = start_parser.add_mutually_exclusive_group()

start_parser.add_argument(
"--base_path",
"-b",
Expand Down Expand Up @@ -154,28 +149,14 @@ def register_explore(parent_parser):
help="The path to the Docker Compose specification.",
)

auth_group.add_argument(
"--auth-gcp",
action="store_true",
help=f"Flag to authenticate with GCP. Uses the following mount command"
f" `{CloudCredentialsMount.GCP.value}`",
)

auth_group.add_argument(
"--auth-azure",
action="store_true",
help="Flag to authenticate with Azure. Uses the following mount command"
f" `{CloudCredentialsMount.AZURE.value}`",
)

auth_group.add_argument(
"--auth-aws",
action="store_true",
help="Flag to authenticate with AWS. Uses the following mount command"
f" `{CloudCredentialsMount.AWS.value}`",
start_parser.add_argument(
"--auth-provider",
type=cloud_credentials_arg,
choices=list(CloudCredentialsMount),
help="Flag to authenticate with a cloud provider",
)

auth_group.add_argument(
start_parser.add_argument(
"--extra-volumes",
help="""Extra volumes to mount in containers. You can use the --extra-volumes flag to specify extra volumes to mount in the containers this can be used:
- to mount data directories to be used by the pipeline (note that if your pipeline's base_path is local it will already be mounted for you).
Expand Down Expand Up @@ -204,11 +185,6 @@ def start_explore(args):

extra_volumes = []

cloud_cred = get_cloud_credentials(args)

if cloud_cred:
extra_volumes.append(cloud_cred)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

Expand All @@ -218,6 +194,7 @@ def start_explore(args):
tag=args.tag,
port=args.port,
extra_volumes=extra_volumes,
auth_provider=args.auth_provider,
)


Expand Down Expand Up @@ -329,7 +306,7 @@ def register_compile(parent_parser):
compiler_subparser = parser.add_subparsers()

local_parser = compiler_subparser.add_parser(name="local", help="Local compiler")
auth_group_local_parser = local_parser.add_mutually_exclusive_group()
local_parser.add_mutually_exclusive_group()

kubeflow_parser = compiler_subparser.add_parser(
name="kubeflow",
Expand Down Expand Up @@ -371,25 +348,11 @@ def register_compile(parent_parser):
default=[],
)

auth_group_local_parser.add_argument(
"--auth-gcp",
action="store_true",
help=f"Flag to authenticate with GCP. Uses the following mount command"
f" `{CloudCredentialsMount.GCP.value}`",
)

auth_group_local_parser.add_argument(
"--auth-azure",
action="store_true",
help="Flag to authenticate with Azure. Uses the following mount command"
f" `{CloudCredentialsMount.AZURE.value}`",
)

auth_group_local_parser.add_argument(
"--auth-aws",
action="store_true",
help="Flag to authenticate with AWS. Uses the following mount command"
f" `{CloudCredentialsMount.AWS.value}`",
local_parser.add_argument(
"--auth-provider",
type=cloud_credentials_arg,
choices=list(CloudCredentialsMount),
help="Flag to authenticate with a cloud provider",
)

# Kubeflow parser
Expand Down Expand Up @@ -449,21 +412,18 @@ def compile_local(args):
from fondant.pipeline.compiler import DockerCompiler

extra_volumes = []
cloud_cred = get_cloud_credentials(args)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

if cloud_cred:
extra_volumes.append(cloud_cred)

pipeline = pipeline_from_string(args.ref)
compiler = DockerCompiler()
compiler.compile(
pipeline=pipeline,
extra_volumes=extra_volumes,
output_path=args.output_path,
build_args=args.build_arg,
auth_provider=args.auth_provider,
)


Expand Down Expand Up @@ -518,7 +478,6 @@ def register_run(parent_parser):
runner_subparser = parser.add_subparsers()

local_parser = runner_subparser.add_parser(name="local", help="Local runner")
auth_group_local_parser = local_parser.add_mutually_exclusive_group()

kubeflow_parser = runner_subparser.add_parser(
name="kubeflow",
Expand Down Expand Up @@ -573,25 +532,12 @@ def register_run(parent_parser):
help="KubeFlow pipeline host url",
required=True,
)
auth_group_local_parser.add_argument(
"--auth-gcp",
action="store_true",
help=f"Flag to authenticate with GCP. Uses the following mount command"
f" `{CloudCredentialsMount.GCP.value}`",
)

auth_group_local_parser.add_argument(
"--auth-azure",
action="store_true",
help="Flag to authenticate with Azure. Uses the following mount command"
f" `{CloudCredentialsMount.AZURE.value}`",
)

auth_group_local_parser.add_argument(
"--auth-aws",
action="store_true",
help="Flag to authenticate with AWS. Uses the following mount command"
f" `{CloudCredentialsMount.AWS.value}`",
local_parser.add_argument(
"--auth-provider",
type=cloud_credentials_arg,
choices=list(CloudCredentialsMount),
help="Flag to authenticate with a cloud provider",
)

# Vertex runner parser
Expand Down Expand Up @@ -661,14 +607,10 @@ def run_local(args):
from fondant.pipeline.runner import DockerRunner

extra_volumes = []
cloud_cred = get_cloud_credentials(args)

if args.extra_volumes:
extra_volumes.extend(args.extra_volumes)

if cloud_cred:
extra_volumes.append(cloud_cred)

try:
ref = pipeline_from_string(args.ref)
except ModuleNotFoundError:
Expand All @@ -679,6 +621,7 @@ def run_local(args):
input=ref,
extra_volumes=extra_volumes,
build_args=args.build_arg,
auth_provider=args.auth_provider,
)


Expand Down
29 changes: 21 additions & 8 deletions src/fondant/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import typing as t
from dataclasses import dataclass
from enum import Enum
from enum import Enum, auto

import pyarrow as pa

Expand All @@ -30,13 +30,26 @@ class DockerVolume:


class CloudCredentialsMount(Enum):
home_directory = os.path.expanduser("~")
AWS = f"{home_directory}/credentials:/root/.aws/credentials"
GCP = (
f"{home_directory}/.config/gcloud/application_default_credentials.json:/root/.config/"
f"gcloud/application_default_credentials.json"
)
AZURE = f"{home_directory}/.azure:/root/.azure"
AWS = auto()
GCP = auto()
AZURE = auto()

def get_path(self):
home_dir = os.path.expanduser("~")

if self == CloudCredentialsMount.AWS:
return f"{home_dir}/credentials:/root/.aws/credentials"

if self == CloudCredentialsMount.GCP:
return (
f"{home_dir}/.config/gcloud/application_default_credentials.json:"
f"/root/.config/gcloud/application_default_credentials.json"
)

if self == CloudCredentialsMount.AZURE:
return f"{home_dir}/.azure:/root/.azure"

return None


"""
Expand Down
9 changes: 8 additions & 1 deletion src/fondant/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import yaml
from fsspec.implementations.local import LocalFileSystem

from fondant.core.schema import DockerVolume
from fondant.core.schema import CloudCredentialsMount, DockerVolume

CONTAINER = "fndnt/data_explorer"
PORT = 8501
Expand All @@ -29,6 +29,7 @@ def _generate_explorer_spec(
container: str = CONTAINER,
tag: t.Optional[str] = None,
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
auth_provider: t.Optional[CloudCredentialsMount] = None,
) -> t.Dict[str, t.Any]:
"""Generate a Docker Compose specification for the Explorer App."""
if tag is None:
Expand All @@ -40,6 +41,9 @@ def _generate_explorer_spec(
if isinstance(extra_volumes, str):
extra_volumes = [extra_volumes]

if auth_provider:
extra_volumes.append(auth_provider.get_path())

# Mount extra volumes to the container
volumes: t.List[t.Union[str, dict]] = []

Expand Down Expand Up @@ -105,6 +109,7 @@ def run_explorer_app( # type: ignore # noqa: PLR0913
output_path: str = OUTPUT_PATH,
tag: t.Optional[str] = None,
extra_volumes: t.Union[t.Optional[list], t.Optional[str]] = None,
auth_provider: t.Optional[CloudCredentialsMount] = None,
): # type: ignore
"""
Run an Explorer App in a Docker container.
Expand All @@ -121,6 +126,7 @@ def run_explorer_app( # type: ignore # noqa: PLR0913
- to mount data directories to be used by the pipeline (note that if your pipeline's
base_path is local it will already be mounted for you).
- to mount cloud credentials
auth_provider: The cloud provider to use for authentication. Default is None.
"""
os.makedirs(".fondant", exist_ok=True)

Expand All @@ -130,6 +136,7 @@ def run_explorer_app( # type: ignore # noqa: PLR0913
container=container,
tag=tag,
extra_volumes=extra_volumes,
auth_provider=auth_provider,
)

with open(output_path, "w") as outfile:
Expand Down
Loading

0 comments on commit 0c92737

Please sign in to comment.