Skip to content

Commit

Permalink
Support GCS(Google Cloud Storage) in YataiService (bentoml#1017)
Browse files Browse the repository at this point in the history
* Integrated Google Cloud Storage

Co-authored-by: korusuke <karan.sheth@somaiya.edu>

* e2e tests

* Addressed PR review comments

* formatting

* update setup file

* remove aws-sam-cli from test requirements

* restore s3_prsigned_url and add gcs_presigned_url

Co-authored-by: PrabhanshuAttri <contact@prabhanshu.com>
Co-authored-by: yubozhao <yubz86@gmail.com>
  • Loading branch information
3 people authored Sep 6, 2020
1 parent 011941f commit 89c0706
Show file tree
Hide file tree
Showing 17 changed files with 359 additions and 59 deletions.
2 changes: 2 additions & 0 deletions bentoml/cli/bento_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ def retrieve(bento, target_dir):

if get_bento_result.bento.uri.s3_presigned_url:
bento_service_bundle_path = get_bento_result.bento.uri.s3_presigned_url
if get_bento_result.bento.uri.gcs_presigned_url:
bento_service_bundle_path = get_bento_result.bento.uri.gcs_presigned_url
else:
bento_service_bundle_path = get_bento_result.bento.uri.uri

Expand Down
7 changes: 5 additions & 2 deletions bentoml/cli/bento_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from bentoml import __version__
from bentoml.utils.lazy_loader import LazyLoader
from bentoml.utils.s3 import is_s3_url
from bentoml.utils.gcs import is_gcs_url
from bentoml.server.api_server import BentoAPIServer
from bentoml.exceptions import BentoMLException, CLIException
from bentoml.server import start_dev_server, start_prod_server
Expand Down Expand Up @@ -121,8 +122,8 @@ def resolve_bundle_path(bento, pip_installed_bundle_path):
), "pip installed BentoService commands should not have Bento argument"
return pip_installed_bundle_path

if os.path.isdir(bento) or is_s3_url(bento):
# saved_bundle already support loading local and s3 path
if os.path.isdir(bento) or is_s3_url(bento) or is_gcs_url(bento):
# saved_bundle already support loading local, s3 path and gcs path
return bento

elif ":" in bento:
Expand All @@ -141,6 +142,8 @@ def resolve_bundle_path(bento, pip_installed_bundle_path):
if get_bento_result.bento.uri.s3_presigned_url:
# Use s3 presigned URL for downloading the repository if it is presented
return get_bento_result.bento.uri.s3_presigned_url
if get_bento_result.bento.uri.gcs_presigned_url:
return get_bento_result.bento.uri.gcs_presigned_url
else:
return get_bento_result.bento.uri.uri
else:
Expand Down
24 changes: 22 additions & 2 deletions bentoml/saved_bundle/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pathlib import PureWindowsPath, PurePosixPath

from bentoml.utils.s3 import is_s3_url
from bentoml.utils.gcs import is_gcs_url
from bentoml.utils.usage_stats import track_load_finish, track_load_start
from bentoml.exceptions import BentoMLException
from bentoml.saved_bundle.config import SavedBundleConfig
Expand All @@ -40,7 +41,9 @@ def _is_http_url(bundle_path):


def _is_remote_path(bundle_path):
return is_s3_url(bundle_path) or _is_http_url(bundle_path)
return (
is_s3_url(bundle_path) or is_gcs_url(bundle_path) or _is_http_url(bundle_path)
)


@contextmanager
Expand All @@ -56,10 +59,28 @@ def _resolve_remote_bundle_path(bundle_path):
fileobj = io.BytesIO()
s3.download_fileobj(bucket_name, object_name, fileobj)
fileobj.seek(0, 0)
elif is_gcs_url(bundle_path):
try:
from google.cloud import storage
except ImportError:
raise BentoMLException(
'"google-cloud-storage" package is required. You can install it with '
'pip: "pip install google-cloud-storage"'
)

gcs = storage.Client()
fileobj = io.BytesIO()
gcs.download_blob_to_file(bundle_path, fileobj)
fileobj.seek(0, 0)
elif _is_http_url(bundle_path):
import requests

response = requests.get(bundle_path)
if response.status_code != 200:
raise BentoMLException(
f"Error retrieving BentoService bundle. "
f"{response.status_code}: {response.text}"
)
fileobj = io.BytesIO()
fileobj.write(response.content)
fileobj.seek(0, 0)
Expand Down Expand Up @@ -225,7 +246,6 @@ def load(bundle_path):
if _is_remote_path(bundle_path):
with _resolve_remote_bundle_path(bundle_path) as local_bundle_path:
return load(local_bundle_path)

track_load_start()

svc_cls = load_bento_service_class(bundle_path)
Expand Down
30 changes: 30 additions & 0 deletions bentoml/utils/gcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2019 Atalaya Tech, Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from urllib.parse import urlparse

logger = logging.getLogger(__name__)


def is_gcs_url(url):
"""
Check if the url is a gcs url
'gs://' is the standard way for Google Cloud URI
Source: https://cloud.google.com/storage/docs/gsutil
"""
try:
return urlparse(url).scheme in ["gs"]
except ValueError:
return False
17 changes: 12 additions & 5 deletions bentoml/yatai/client/bento_repository_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ def _upload_bento_service(self, bento_service, saved_bento_path):
)
# Return URI to saved bento in repository storage
return response.uri.uri
elif response.uri.type == BentoUri.S3:
elif response.uri.type == BentoUri.S3 or response.uri.type == BentoUri.GCS:
uri_type = 'S3' if response.uri.type == BentoUri.S3 else 'GCS'
self._update_bento_upload_progress(
bento_service_metadata, UploadStatus.UPLOADING, 0
)
Expand All @@ -124,28 +125,34 @@ def _upload_bento_service(self, bento_service, saved_bento_path):
tar.add(saved_bento_path, arcname=bento_service_metadata.name)
fileobj.seek(0, 0)

http_response = requests.put(response.uri.s3_presigned_url, data=fileobj)
if response.uri.type == BentoUri.S3:
http_response = requests.put(
response.uri.s3_presigned_url, data=fileobj
)
else:
http_response = requests.put(
response.uri.gcs_presigned_url, data=fileobj
)

if http_response.status_code != 200:
self._update_bento_upload_progress(
bento_service_metadata, UploadStatus.ERROR
)
raise BentoMLException(
f"Error saving BentoService bundle to S3. "
f"Error saving BentoService bundle to {uri_type}."
f"{http_response.status_code}: {http_response.text}"
)

self._update_bento_upload_progress(bento_service_metadata)

logger.info(
"Successfully saved BentoService bundle '%s:%s' to S3: %s",
"Successfully saved BentoService bundle '%s:%s' to {uri_type}: %s",
bento_service_metadata.name,
bento_service_metadata.version,
response.uri.uri,
)

return response.uri.uri

else:
raise BentoMLException(
f"Error saving Bento to target repository, URI type {response.uri.type}"
Expand Down
Loading

0 comments on commit 89c0706

Please sign in to comment.