Skip to content
This repository was archived by the owner on Sep 3, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
1baa5f0
Adding evaluationanalysis API to generate evaluation stats from eval …
qimingj Nov 30, 2016
f220fd0
Feature slicing view visualization component. (#109)
qimingj Dec 8, 2016
9b27cbe
Merge branch 'master' into cloudml
qimingj Dec 8, 2016
fa99d76
Datalab Inception (image classification) solution. (#117)
qimingj Jan 3, 2017
06dfd6f
Datalab "ml" magics for running a solution package. Update Inception …
qimingj Jan 13, 2017
ad94352
Update feature-slice-view supporting file, which fixes some stability…
qimingj Jan 17, 2017
eb46fd9
Remove old feature-slicing pipeline implementation (is replaced by Bi…
qimingj Jan 19, 2017
a3639bd
Mergemaster/cloudml (#134)
qimingj Jan 19, 2017
0694052
Fix an issue that prediction right after preprocessing fails in incep…
qimingj Jan 20, 2017
fcf8fa7
add structure data preprocessing and training (#132)
brandondutra Jan 20, 2017
e309785
first full-feature version of structured data is done (#139)
brandondutra Jan 26, 2017
e92b790
Inception Package Improvements (#138)
qimingj Jan 30, 2017
b4e096e
Cloudmlm (#152)
qimingj Feb 1, 2017
ea1e170
Remove old DataSet implementation. Create new DataSets. (#151)
qimingj Feb 1, 2017
7137bcc
Inception package improvements (#155)
qimingj Feb 2, 2017
b3b9caf
Update feature slice view UI. Added Slices Overview. (#161)
qimingj Feb 3, 2017
d3e1f78
Move TensorBoard and TensorFlow Events UI rendering to Python functio…
qimingj Feb 6, 2017
1d0629d
new preprocessing and training for structured data (#160)
brandondutra Feb 7, 2017
3c3e89e
Move job, models, and feature_slice_view plotting to API. (#167)
qimingj Feb 7, 2017
b865a78
A util function to repackage and copy the package to staging location…
qimingj Feb 8, 2017
fdb695a
Move confusion matrix from %%ml to library. (#159)
qimingj Feb 8, 2017
f3eb838
Improve inception package so there is no need to have an GCS copy of …
qimingj Feb 9, 2017
6d8ffb6
Cloudmlsdp (#177)
brandondutra Feb 9, 2017
9bff1fb
Add CloudTrainingConfig namedtuple to wrap cloud training configurati…
qimingj Feb 9, 2017
2cf04df
prediction update (#183)
brandondutra Feb 13, 2017
9a268bc
Inception Package Improvements (#186)
qimingj Feb 13, 2017
fc3e958
Cloudmlmerge (#188)
qimingj Feb 13, 2017
70a2cfc
CsvDataSet no longer globs files in init. (#187)
brandondutra Feb 13, 2017
c26eb24
Move cloud trainer and predictor from their own classes to Job and Mo…
qimingj Feb 14, 2017
b0770c7
removed the feature type file (#199)
brandondutra Feb 15, 2017
8cfd621
Make inception to work with tf1.0. (#204)
qimingj Feb 15, 2017
67f3614
Workaround a TF summary issue. Force online prediction to use TF 1.0.…
qimingj Feb 16, 2017
2cd2edd
sd package. Local everything is working. (#211)
brandondutra Feb 16, 2017
9b7edaa
Remove tf dependency from structured data setup.py. (#212)
qimingj Feb 16, 2017
240e3f9
Cloudmld (#213)
brandondutra Feb 16, 2017
3c097f8
Add a resize option for inception package to avoid sending large data…
qimingj Feb 17, 2017
6e0466e
Cleanup mlalpha APIs that are not needed. (#218)
qimingj Feb 21, 2017
3be65c6
Inception package updates. (#219)
qimingj Feb 22, 2017
dc39ec3
Cloudml Branch Merge From Master (#222)
qimingj Feb 22, 2017
eece0c6
Remove CloudML SDK as dependency for PyDatalab. (#227)
qimingj Feb 23, 2017
91d89bd
Remove CloudML dependency from Inception. (#225)
qimingj Feb 23, 2017
8e67dbf
TensorFlow's save_model no longer creates export.meta, so disable the…
qimingj Feb 23, 2017
cf827ba
Cloudmlsm (#229)
brandondutra Feb 23, 2017
eb6ba05
small fixes to sd (#231)
brandondutra Feb 23, 2017
e3d011d
Rename from mlalpha to ml. (#232)
qimingj Feb 23, 2017
7c08bdd
fixed prediction (#235)
brandondutra Feb 24, 2017
4e9bf6e
small fixes (#236)
brandondutra Feb 24, 2017
6fa717b
Cloudmlmerge (#238)
qimingj Feb 25, 2017
ff36026
Merge branch 'cloudml'
qimingj Feb 25, 2017
45bc56f
Merge branch 'master' into cloudmlmerge
Feb 26, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 7 additions & 13 deletions datalab/mlalpha/__init__.py → datalab/ml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,14 @@

from __future__ import absolute_import

from ._local_runner import LocalRunner
from ._cloud_runner import CloudRunner
from ._metadata import Metadata
from ._local_predictor import LocalPredictor
from ._cloud_predictor import CloudPredictor
from ._job import Jobs
from ._job import Jobs, Job
from ._summary import Summary
from ._tensorboard import TensorBoardManager
from ._dataset import DataSet
from ._package import Packager
from ._cloud_models import CloudModels, CloudModelVersions
from ._tensorboard import TensorBoard
from ._dataset import CsvDataSet, BigQueryDataSet
from ._cloud_models import Models, ModelVersions
from ._confusion_matrix import ConfusionMatrix
from ._feature_slice_view import FeatureSliceView
from ._cloud_training_config import CloudTrainingConfig
from ._util import *

from plotly.offline import init_notebook_mode

init_notebook_mode()

274 changes: 274 additions & 0 deletions datalab/ml/_cloud_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
# Copyright 2016 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.

"""Implements Cloud ML Model Operations"""

from googleapiclient import discovery
import os
import yaml

import datalab.context
import datalab.storage
import datalab.utils

from . import _util

class Models(object):
"""Represents a list of Cloud ML models for a project."""

def __init__(self, project_id=None):
"""
Args:
project_id: project_id of the models. If not provided, default project_id will be used.
"""
if project_id is None:
project_id = datalab.context.Context.default().project_id
self._project_id = project_id
self._credentials = datalab.context.Context.default().credentials
self._api = discovery.build('ml', 'v1', credentials=self._credentials)

def _retrieve_models(self, page_token, page_size):
list_info = self._api.projects().models().list(
parent='projects/' + self._project_id, pageToken=page_token, pageSize=page_size).execute()
models = list_info.get('models', [])
page_token = list_info.get('nextPageToken', None)
return models, page_token

def get_iterator(self):
"""Get iterator of models so it can be used as "for model in Models().get_iterator()".
"""
return iter(datalab.utils.Iterator(self._retrieve_models))

def get_model_details(self, model_name):
"""Get details of the specified model from CloudML Service.

Args:
model_name: the name of the model. It can be a model full name
("projects/[project_id]/models/[model_name]") or just [model_name].
Returns: a dictionary of the model details.
"""
full_name = model_name
if not model_name.startswith('projects/'):
full_name = ('projects/%s/models/%s' % (self._project_id, model_name))
return self._api.projects().models().get(name=full_name).execute()

def create(self, model_name):
"""Create a model.

Args:
model_name: the short name of the model, such as "iris".
Returns:
If successful, returns informaiton of the model, such as
{u'regions': [u'us-central1'], u'name': u'projects/myproject/models/mymodel'}
Raises:
If the model creation failed.
"""
body = {'name': model_name}
parent = 'projects/' + self._project_id
# Model creation is instant. If anything goes wrong, Exception will be thrown.
return self._api.projects().models().create(body=body, parent=parent).execute()

def delete(self, model_name):
"""Delete a model.

Args:
model_name: the name of the model. It can be a model full name
("projects/[project_id]/models/[model_name]") or just [model_name].
"""
full_name = model_name
if not model_name.startswith('projects/'):
full_name = ('projects/%s/models/%s' % (self._project_id, model_name))
response = self._api.projects().models().delete(name=full_name).execute()
if 'name' not in response:
raise Exception('Invalid response from service. "name" is not found.')
_util.wait_for_long_running_operation(response['name'])

def list(self, count=10):
"""List models under the current project in a table view.

Args:
count: upper limit of the number of models to list.
Raises:
Exception if it is called in a non-IPython environment.
"""
import IPython
data = []
# Add range(count) to loop so it will stop either it reaches count, or iteration
# on self is exhausted. "self" is iterable (see __iter__() method).
for _, model in zip(range(count), self):
element = {'name': model['name']}
if 'defaultVersion' in model:
version_short_name = model['defaultVersion']['name'].split('/')[-1]
element['defaultVersion'] = version_short_name
data.append(element)

IPython.display.display(
datalab.utils.commands.render_dictionary(data, ['name', 'defaultVersion']))

def describe(self, model_name):
"""Print information of a specified model.

Args:
model_name: the name of the model to print details on.
"""
model_yaml = yaml.safe_dump(self.get_model_details(model_name), default_flow_style=False)
print model_yaml


class ModelVersions(object):
"""Represents a list of versions for a Cloud ML model."""

def __init__(self, model_name, project_id=None):
"""
Args:
model_name: the name of the model. It can be a model full name
("projects/[project_id]/models/[model_name]") or just [model_name].
project_id: project_id of the models. If not provided and model_name is not a full name
(not including project_id), default project_id will be used.
"""
if project_id is None:
self._project_id = datalab.context.Context.default().project_id
self._credentials = datalab.context.Context.default().credentials
self._api = discovery.build('ml', 'v1', credentials=self._credentials)
if not model_name.startswith('projects/'):
model_name = ('projects/%s/models/%s' % (self._project_id, model_name))
self._full_model_name = model_name
self._model_name = self._full_model_name.split('/')[-1]

def _retrieve_versions(self, page_token, page_size):
parent = self._full_model_name
list_info = self._api.projects().models().versions().list(parent=parent,
pageToken=page_token, pageSize=page_size).execute()
versions = list_info.get('versions', [])
page_token = list_info.get('nextPageToken', None)
return versions, page_token

def get_iterator(self):
"""Get iterator of versions so it can be used as
"for v in ModelVersions(model_name).get_iterator()".
"""
return iter(datalab.utils.Iterator(self._retrieve_versions))

def get_version_details(self, version_name):
"""Get details of a version.

Args:
version: the name of the version in short form, such as "v1".
Returns: a dictionary containing the version details.
"""
name = ('%s/versions/%s' % (self._full_model_name, version_name))
return self._api.projects().models().versions().get(name=name).execute()

def deploy(self, version_name, path):
"""Deploy a model version to the cloud.

Args:
version_name: the name of the version in short form, such as "v1".
path: the Google Cloud Storage path (gs://...) which contains the model files.

Raises: Exception if the path is invalid or does not contain expected files.
Exception if the service returns invalid response.
"""
if not path.startswith('gs://'):
raise Exception('Invalid path. Only Google Cloud Storage path (gs://...) is accepted.')

# If there is no "export.meta" or"saved_model.pb" under path but there is
# path/model/export.meta or path/model/saved_model.pb, then append /model to the path.
if (not datalab.storage.Item.from_url(os.path.join(path, 'export.meta')).exists() and
not datalab.storage.Item.from_url(os.path.join(path, 'saved_model.pb')).exists()):
if (datalab.storage.Item.from_url(os.path.join(path, 'model', 'export.meta')).exists() or
datalab.storage.Item.from_url(os.path.join(path, 'model', 'saved_model.pb')).exists()):
path = os.path.join(path, 'model')
else:
print('Cannot find export.meta or saved_model.pb, but continue with deployment anyway.')

body = {'name': self._model_name}
parent = 'projects/' + self._project_id
try:
self._api.projects().models().create(body=body, parent=parent).execute()
except:
# Trying to create an already existing model gets an error. Ignore it.
pass
body = {
'name': version_name,
'deployment_uri': path,
'runtime_version': '1.0',
}
response = self._api.projects().models().versions().create(body=body,
parent=self._full_model_name).execute()
if 'name' not in response:
raise Exception('Invalid response from service. "name" is not found.')
_util.wait_for_long_running_operation(response['name'])

def delete(self, version_name):
"""Delete a version of model.

Args:
version_name: the name of the version in short form, such as "v1".
"""
name = ('%s/versions/%s' % (self._full_model_name, version_name))
response = self._api.projects().models().versions().delete(name=name).execute()
if 'name' not in response:
raise Exception('Invalid response from service. "name" is not found.')
_util.wait_for_long_running_operation(response['name'])

def predict(self, version_name, data):
"""Get prediction results from features instances.

Args:
version_name: the name of the version used for prediction.
data: typically a list of instance to be submitted for prediction. The format of the
instance depends on the model. For example, structured data model may require
a csv line for each instance.
Note that online prediction only works on models that take one placeholder value,
such as a string encoding a csv line.
Returns:
A list of prediction results for given instances. Each element is a dictionary representing
output mapping from the graph.
An example:
[{"predictions": 1, "score": [0.00078, 0.71406, 0.28515]},
{"predictions": 1, "score": [0.00244, 0.99634, 0.00121]}]
"""
full_version_name = ('%s/versions/%s' % (self._full_model_name, version_name))
request = self._api.projects().predict(body={'instances': data},
name=full_version_name)
request.headers['user-agent'] = 'GoogleCloudDataLab/1.0'
result = request.execute()
if 'predictions' not in result:
raise Exception('Invalid response from service. Cannot find "predictions" in response.')

return result['predictions']

def describe(self, version_name):
"""Print information of a specified model.

Args:
version: the name of the version in short form, such as "v1".
"""
version_yaml = yaml.safe_dump(self.get_version_details(version_name),
default_flow_style=False)
print version_yaml

def list(self):
"""List versions under the current model in a table view.

Raises:
Exception if it is called in a non-IPython environment.
"""
import IPython

# "self" is iterable (see __iter__() method).
data = [{'name': version['name'].split()[-1],
'deploymentUri': version['deploymentUri'], 'createTime': version['createTime']}
for version in self]
IPython.display.display(
datalab.utils.commands.render_dictionary(data, ['name', 'deploymentUri', 'createTime']))
47 changes: 47 additions & 0 deletions datalab/ml/_cloud_training_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import namedtuple

_CloudTrainingConfig = namedtuple("CloudConfig",
['region', 'scale_tier', 'master_type', 'worker_type',
'parameter_server_type', 'worker_count', 'parameter_server_count'])
_CloudTrainingConfig.__new__.__defaults__ = ('BASIC', None, None, None, None, None)


class CloudTrainingConfig(_CloudTrainingConfig):
"""A config namedtuple containing cloud specific configurations for CloudML training.

Fields:
region: the region of the training job to be submitted. For example, "us-central1".
Run "gcloud compute regions list" to get a list of regions.
scale_tier: Specifies the machine types, the number of replicas for workers and
parameter servers. For example, "STANDARD_1". See
https://cloud.google.com/ml/reference/rest/v1beta1/projects.jobs#scaletier
for list of accepted values.
master_type: specifies the type of virtual machine to use for your training
job's master worker. Must set this value when scale_tier is set to CUSTOM.
See the link in "scale_tier".
worker_type: specifies the type of virtual machine to use for your training
job's worker nodes. Must set this value when scale_tier is set to CUSTOM.
parameter_server_type: specifies the type of virtual machine to use for your training
job's parameter server. Must set this value when scale_tier is set to CUSTOM.
worker_count: the number of worker replicas to use for the training job. Each
replica in the cluster will be of the type specified in "worker_type".
Must set this value when scale_tier is set to CUSTOM.
parameter_server_count: the number of parameter server replicas to use. Each
replica in the cluster will be of the type specified in "parameter_server_type".
Must set this value when scale_tier is set to CUSTOM.
"""
pass
Loading