Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add proxy agent runner for kubeflow pipeline #988

Merged
merged 5 commits into from
Mar 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions proxy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Pin to a specific version of invert proxy agent
FROM gcr.io/inverting-proxy/agent@sha256:d0a06a247bb443f9528356a1341cadfa4c4479a034097ef9ed8cf200c6383ec0

RUN apt-get update && apt-get install -y curl jq python-pip
RUN pip install requests

# Install gcloud SDK
RUN curl https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz > /tmp/google-cloud-sdk.tar.gz
RUN mkdir -p /usr/local/gcloud
RUN tar -C /usr/local/gcloud -xf /tmp/google-cloud-sdk.tar.gz
RUN /usr/local/gcloud/google-cloud-sdk/install.sh
ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin

# Install kubectl
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
RUN chmod +x ./kubectl
RUN mv kubectl /usr/local/bin/

ADD ./ /opt/proxy

CMD ["/bin/sh", "-c", "/opt/proxy/attempt-register-vm-on-proxy.sh"]
5 changes: 5 additions & 0 deletions proxy/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
approvers:
- IronPan
reviewers:
- IronPan
- ojarjur
80 changes: 80 additions & 0 deletions proxy/attempt-register-vm-on-proxy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/bin/bash
#
# Copyright 2019 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"

function run-proxy-agent {
# Start the proxy process
# https://github.com/google/inverting-proxy/blob/master/agent/Dockerfile
# Connect proxy agent to ambassador so anything registered to ambassador can be transparently accessed.
/opt/bin/proxy-forwarding-agent \
--debug=${DEBUG} \
--proxy=${PROXY_URL} \
--proxy-timeout=${PROXY_TIMEOUT} \
--backend=${BACKEND_ID} \
--host=${AMBASSADOR_SERVICE_HOST}:${AMBASSADOR_SERVICE_PORT} \
--shim-websockets=true \
--shim-path=websocket-shim \
--health-check-path=${HEALTH_CHECK_PATH} \
--health-check-interval-seconds=${HEALTH_CHECK_INTERVAL_SECONDS} \
--health-check-unhealthy-threshold=${HEALTH_CHECK_UNHEALTHY_THRESHOLD}
}

# Check if the cluster already have proxy agent installed by checking ConfigMap.
if kubectl get configmap inverse-proxy-config; then
# If ConfigMap already exist, reuse the existing endpoint (a.k.a BACKEND_ID) and same ProxyUrl.
PROXY_URL=$(kubectl get configmap inverse-proxy-config -o json | jq -r ".data.ProxyUrl")
hongye-sun marked this conversation as resolved.
Show resolved Hide resolved
BACKEND_ID=$(kubectl get configmap inverse-proxy-config -o json | jq -r ".data.BackendId")
run-proxy-agent
exit 0
fi

# Activate service account for gcloud SDK first
if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then
gcloud auth activate-service-account --key-file="${GOOGLE_APPLICATION_CREDENTIALS}"
fi

INSTANCE_ZONE="/"$(curl http://metadata.google.internal/computeMetadata/v1/instance/zone -H "Metadata-Flavor: Google")
INSTANCE_ZONE="${INSTANCE_ZONE##/*/}"

# Get latest Proxy server URL
curl -O https://storage.googleapis.com/dl-platform-public-configs/proxy-agent-config.json
PROXY_URL=$(python ${DIR}/get_proxy_url.py --config-file-path "proxy-agent-config.json" --location "${INSTANCE_ZONE}" --version "latest")
if [[ -z "${PROXY_URL}" ]]; then
echo "Proxy URL for the zone ${INSTANCE_ZONE} no found, exiting."
exit 1
fi
echo "Proxy URL from the config: ${PROXY_URL}"

# Register the proxy agent
VM_ID=$(curl -H 'Metadata-Flavor: Google' "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/identity?format=full&audience=${PROXY_URL}/request-endpoint" 2>/dev/null)
RESULT_JSON=$(curl -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "X-Inverting-Proxy-VM-ID: ${VM_ID}" -d "" "${PROXY_URL}/request-endpoint" 2>/dev/null)
echo "Response from the registration server: ${RESULT_JSON}"

HOSTNAME=$(echo "${RESULT_JSON}" | jq -r ".hostname")
BACKEND_ID=$(echo "${RESULT_JSON}" | jq -r ".backendID")
echo "Hostname: ${HOSTNAME}"
echo "Backend id: ${BACKEND_ID}"

# Store the registration information in a ConfigMap
kubectl create configmap inverse-proxy-config \
--from-literal=ProxyUrl=${PROXY_URL} \
--from-literal=BackendId=${BACKEND_ID} \
--from-literal=Hostname=${HOSTNAME}

run-proxy-agent
106 changes: 106 additions & 0 deletions proxy/get_proxy_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""CLI tool that returns URL of the proxy for particular zone and version."""
import argparse
import functools
import json
import logging
import re
import requests

def urls_for_zone(zone, location_to_urls_map):
"""Returns list of potential proxy URLs for a given zone.

Returns:
List of possible URLs, in order of proximity.
Args:
zone: GCP zone
location_to_urls_map: Maps region/country/continent to list of URLs, e.g.:
{
"us-west1" : [ us-west1-url ],
"us-east1" : [ us-east1-url ],
"us" : [ us-west1-url ],
...
}
"""
zone_match = re.match("((([a-z]+)-[a-z]+)\d+)-[a-z]", zone)
if not zone_match:
raise ValueError("Incorrect zone specified: {}".format(zone))

# e.g. zone = us-west1-b
region = zone_match.group(1) # us-west1
approx_region = zone_match.group(2) # us-west
country = zone_match.group(3) # us

urls = []
if region in location_to_urls_map:
urls.extend(location_to_urls_map[region])

region_regex = re.compile("([a-z]+-[a-z]+)\d+")
for location in location_to_urls_map:
region_match = region_regex.match(location)
if region_match and region_match.group(1) == approx_region:
urls.extend(location_to_urls_map[location])

if country in location_to_urls_map:
urls.extend(location_to_urls_map[country])

return set(urls)


def main():
unicode_type = functools.partial(unicode, encoding="utf8")
parser = argparse.ArgumentParser(
description="Get proxy URL")
parser.add_argument("--config-file-path", required=True, type=unicode_type)
parser.add_argument("--location", required=True, type=unicode_type)
parser.add_argument("--version", required=True, type=unicode_type)

args = parser.parse_args()
with open(args.config_file_path, "r") as config_file:
data = json.loads(config_file.read())

agent_containers_config = data["agent-docker-containers"]
version = args.version
if version not in agent_containers_config:
version = "latest"
if version not in agent_containers_config:
raise ValueError("Version latest not found in the config file.")
container_config = agent_containers_config[version]
regional_urls = container_config["proxy-urls"]

location = args.location
urls = urls_for_zone(location, regional_urls)
if not urls:
raise ValueError("No valid URLs found for zone: {}".format(location))

for url in urls:
try:
status_code = requests.head(url).status_code
except requests.ConnectionError:
pass
expected_codes = frozenset([307])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JFYI, the plain set also exists.
Nothing wrong with frozenset (immutable) though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack. thanks

# 307 - Temporary Redirect, Proxy server sends this if VM has access rights.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it's still weird that the endpoint only returns 307 as normal response status and not 200. Could you confirm if it's the case with the Jaas team?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i double checked with slava earlier and this is intended behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3xx codes are pretty normal.
Codes < 400 describe success.

if status_code in expected_codes:
logging.debug("Status code from the url %s", status_code)
print(url)
exit(0)
logging.debug("Incorrect status_code from the server: %s. Expected: %s",
status_code, expected_codes)
raise ValueError("No working URL found")
hongye-sun marked this conversation as resolved.
Show resolved Hide resolved

if __name__ == '__main__':
main()
45 changes: 45 additions & 0 deletions proxy/get_proxy_url_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import unittest

from get_proxy_url import urls_for_zone

url_map_json = """
{
"us": ["https://datalab-us-west1.cloud.google.com"],
"us-west1": ["https://datalab-us-west1.cloud.google.com"],
"us-east1": ["https://datalab-us-east1.cloud.google.com"]
}
"""

class TestUrlsForZone(unittest.TestCase):

def test_get_urls(self):
self.assertEqual(
set(["https://datalab-us-east1.cloud.google.com","https://datalab-us-west1.cloud.google.com"]),
urls_for_zone("us-east1-a",json.loads(url_map_json)))


def test_get_urls_no_match(self):
self.assertEqual(set([]), urls_for_zone("euro-west1-a",json.loads(url_map_json)))

def test_get_urls_incorrect_format(self):
with self.assertRaises(ValueError):
urls_for_zone("weird-format-a",json.loads(url_map_json))

if __name__ == '__main__':
unittest.main()