From 10f95cde5e0282a99041ff2108111970f52379f3 Mon Sep 17 00:00:00 2001
From: Sara Robinson <sararob@users.noreply.github.com>
Date: Wed, 20 Apr 2022 08:37:20 -0400
Subject: [PATCH] feat: add ModelEvaluation support (#1167)

---
 README.rst                                    |  41 ++++
 google/cloud/aiplatform/__init__.py           |   4 +-
 .../aiplatform/model_evaluation/__init__.py   |  20 ++
 .../model_evaluation/model_evaluation.py      |  93 +++++++++
 google/cloud/aiplatform/models.py             |  77 ++++++++
 .../unit/aiplatform/test_model_evaluation.py  | 186 ++++++++++++++++++
 tests/unit/aiplatform/test_models.py          | 115 +++++++++++
 7 files changed, 535 insertions(+), 1 deletion(-)
 create mode 100644 google/cloud/aiplatform/model_evaluation/__init__.py
 create mode 100644 google/cloud/aiplatform/model_evaluation/model_evaluation.py
 create mode 100644 tests/unit/aiplatform/test_model_evaluation.py

diff --git a/README.rst b/README.rst
index 6e4fccb6ae..fc961c63ef 100644
--- a/README.rst
+++ b/README.rst
@@ -283,6 +283,47 @@ Please visit `Importing models to Vertex AI`_ for a detailed overview:
 
 .. _Importing models to Vertex AI: https://cloud.google.com/vertex-ai/docs/general/import-model
 
+Model Evaluation
+----------------
+
+The Vertex AI SDK for Python currently supports getting model evaluation metrics for all AutoML models.
+
+To list all model evaluations for a model:
+
+.. code-block:: Python
+
+  model = aiplatform.Model('/projects/my-project/locations/us-central1/models/{MODEL_ID}')
+
+  evaluations = model.list_model_evaluations()
+  
+
+To get the model evaluation resource for a given model:
+
+.. code-block:: Python
+
+  model = aiplatform.Model('/projects/my-project/locations/us-central1/models/{MODEL_ID}')
+
+  # returns the first evaluation with no arguments, you can also pass the evaluation ID
+  evaluation = model.get_model_evaluation()
+
+  eval_metrics = evaluation.metrics
+
+
+You can also create a reference to your model evaluation directly by passing in the resource name of the model evaluation:
+
+.. code-block:: Python
+
+  evaluation = aiplatform.ModelEvaluation(
+    evaluation_name='/projects/my-project/locations/us-central1/models/{MODEL_ID}/evaluations/{EVALUATION_ID}')
+
+Alternatively, you can create a reference to your evaluation by passing in the model and evaluation IDs:
+
+.. code-block:: Python
+
+  evaluation = aiplatform.ModelEvaluation(
+    evaluation_name={EVALUATION_ID},
+    model_id={MODEL_ID})
+
 
 Batch Prediction
 ----------------
diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 1defb5ad47..3f4d836678 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2020 Google LLC
+# Copyright 2022 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -41,6 +41,7 @@
 from google.cloud.aiplatform.metadata import metadata
 from google.cloud.aiplatform.models import Endpoint
 from google.cloud.aiplatform.models import Model
+from google.cloud.aiplatform.model_evaluation import ModelEvaluation
 from google.cloud.aiplatform.jobs import (
     BatchPredictionJob,
     CustomJob,
@@ -107,6 +108,7 @@
     "ImageDataset",
     "HyperparameterTuningJob",
     "Model",
+    "ModelEvaluation",
     "PipelineJob",
     "TabularDataset",
     "Tensorboard",
diff --git a/google/cloud/aiplatform/model_evaluation/__init__.py b/google/cloud/aiplatform/model_evaluation/__init__.py
new file mode 100644
index 0000000000..7dcbee2db5
--- /dev/null
+++ b/google/cloud/aiplatform/model_evaluation/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from google.cloud.aiplatform.model_evaluation.model_evaluation import ModelEvaluation
+
+__all__ = ("ModelEvaluation",)
diff --git a/google/cloud/aiplatform/model_evaluation/model_evaluation.py b/google/cloud/aiplatform/model_evaluation/model_evaluation.py
new file mode 100644
index 0000000000..f8553b7644
--- /dev/null
+++ b/google/cloud/aiplatform/model_evaluation/model_evaluation.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from google.auth import credentials as auth_credentials
+
+from google.cloud.aiplatform import base
+from google.cloud.aiplatform import utils
+from google.cloud.aiplatform import models
+from google.protobuf import struct_pb2
+
+from typing import Optional
+
+
+class ModelEvaluation(base.VertexAiResourceNounWithFutureManager):
+
+    client_class = utils.ModelClientWithOverride
+    _resource_noun = "evaluations"
+    _delete_method = None
+    _getter_method = "get_model_evaluation"
+    _list_method = "list_model_evaluations"
+    _parse_resource_name_method = "parse_model_evaluation_path"
+    _format_resource_name_method = "model_evaluation_path"
+
+    @property
+    def metrics(self) -> Optional[struct_pb2.Value]:
+        """Gets the evaluation metrics from the Model Evaluation.
+        Returns:
+            A dict with model metrics created from the Model Evaluation or
+            None if the metrics for this evaluation are empty.
+        """
+        return self._gca_resource.metrics
+
+    def __init__(
+        self,
+        evaluation_name: str,
+        model_id: Optional[str] = None,
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+    ):
+        """Retrieves the ModelEvaluation resource and instantiates its representation.
+
+        Args:
+            evaluation_name (str):
+                Required. A fully-qualified model evaluation resource name or evaluation ID.
+                Example: "projects/123/locations/us-central1/models/456/evaluations/789" or
+                "789". If passing only the evaluation ID, model_id must be provided.
+            model_id (str):
+                Optional. The ID of the model to retrieve this evaluation from. If passing
+                only the evaluation ID as evaluation_name, model_id must be provided.
+            project (str):
+                Optional project to retrieve model evaluation from. If not set, project
+                set in aiplatform.init will be used.
+            location (str):
+                Optional location to retrieve model evaluation from. If not set, location
+                set in aiplatform.init will be used.
+            credentials: Optional[auth_credentials.Credentials]=None,
+                Custom credentials to use to retrieve this model evaluation. If not set,
+                credentials set in aiplatform.init will be used.
+        """
+
+        super().__init__(
+            project=project,
+            location=location,
+            credentials=credentials,
+            resource_name=evaluation_name,
+        )
+
+        self._gca_resource = self._get_gca_resource(
+            resource_name=evaluation_name,
+            parent_resource_name_fields={models.Model._resource_noun: model_id}
+            if model_id
+            else model_id,
+        )
+
+    def delete(self):
+        raise NotImplementedError(
+            "Deleting a model evaluation has not been implemented yet."
+        )
diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py
index c934a01eec..e344ad2d2c 100644
--- a/google/cloud/aiplatform/models.py
+++ b/google/cloud/aiplatform/models.py
@@ -33,6 +33,7 @@
 from google.cloud.aiplatform import models
 from google.cloud.aiplatform import utils
 from google.cloud.aiplatform.utils import gcs_utils
+from google.cloud.aiplatform import model_evaluation
 
 from google.cloud.aiplatform.compat.services import endpoint_service_client
 
@@ -3210,3 +3211,79 @@ def upload_tensorflow_saved_model(
             sync=sync,
             upload_request_timeout=upload_request_timeout,
         )
+
+    def list_model_evaluations(
+        self,
+    ) -> List["model_evaluation.ModelEvaluation"]:
+        """List all Model Evaluation resources associated with this model.
+
+        Example Usage:
+
+        my_model = Model(
+            model_name="projects/123/locations/us-central1/models/456"
+        )
+
+        my_evaluations = my_model.list_model_evaluations()
+
+        Returns:
+            List[model_evaluation.ModelEvaluation]: List of ModelEvaluation resources
+            for the model.
+        """
+
+        self.wait()
+
+        return model_evaluation.ModelEvaluation._list(
+            parent=self.resource_name,
+            credentials=self.credentials,
+        )
+
+    def get_model_evaluation(
+        self,
+        evaluation_id: Optional[str] = None,
+    ) -> Optional[model_evaluation.ModelEvaluation]:
+        """Returns a ModelEvaluation resource and instantiates its representation.
+        If no evaluation_id is passed, it will return the first evaluation associated
+        with this model.
+
+        Example usage:
+
+            my_model = Model(
+                model_name="projects/123/locations/us-central1/models/456"
+            )
+
+            my_evaluation = my_model.get_model_evaluation(
+                evaluation_id="789"
+            )
+
+            # If no arguments are passed, this returns the first evaluation for the model
+            my_evaluation = my_model.get_model_evaluation()
+
+        Args:
+            evaluation_id (str):
+                Optional. The ID of the model evaluation to retrieve.
+        Returns:
+            model_evaluation.ModelEvaluation: Instantiated representation of the
+            ModelEvaluation resource.
+        """
+
+        evaluations = self.list_model_evaluations()
+
+        if not evaluation_id:
+            if len(evaluations) > 1:
+                _LOGGER.warning(
+                    f"Your model has more than one model evaluation, this is returning only one evaluation resource: {evaluations[0].resource_name}"
+                )
+            return evaluations[0] if evaluations else evaluations
+        else:
+            resource_uri_parts = self._parse_resource_name(self.resource_name)
+            evaluation_resource_name = (
+                model_evaluation.ModelEvaluation._format_resource_name(
+                    **resource_uri_parts,
+                    evaluation=evaluation_id,
+                )
+            )
+
+            return model_evaluation.ModelEvaluation(
+                evaluation_name=evaluation_resource_name,
+                credentials=self.credentials,
+            )
diff --git a/tests/unit/aiplatform/test_model_evaluation.py b/tests/unit/aiplatform/test_model_evaluation.py
new file mode 100644
index 0000000000..c5c5cd9ac3
--- /dev/null
+++ b/tests/unit/aiplatform/test_model_evaluation.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+
+from unittest import mock
+
+from google.cloud import aiplatform
+from google.cloud.aiplatform import base
+from google.cloud.aiplatform import models
+
+from google.cloud.aiplatform_v1.services.model_service import (
+    client as model_service_client,
+)
+
+
+from google.cloud.aiplatform.compat.types import model as gca_model
+
+from google.cloud.aiplatform_v1.types import model_evaluation as gca_model_evaluation
+
+_TEST_PROJECT = "test-project"
+_TEST_LOCATION = "us-central1"
+_TEST_MODEL_NAME = "test-model"
+_TEST_MODEL_ID = "1028944691210842416"
+_TEST_EVAL_ID = "1028944691210842622"
+
+_TEST_MODEL_RESOURCE_NAME = model_service_client.ModelServiceClient.model_path(
+    _TEST_PROJECT, _TEST_LOCATION, _TEST_MODEL_ID
+)
+
+_TEST_MODEL_EVAL_RESOURCE_NAME = (
+    model_service_client.ModelServiceClient.model_evaluation_path(
+        _TEST_PROJECT,
+        _TEST_LOCATION,
+        _TEST_MODEL_ID,
+        _TEST_EVAL_ID,
+    )
+)
+
+_TEST_MODEL_EVAL_METRICS = {
+    "auPrc": 0.80592036,
+    "auRoc": 0.8100363,
+    "logLoss": 0.53061414,
+    "confidenceMetrics": [
+        {
+            "confidenceThreshold": -0.01,
+            "recall": 1.0,
+            "precision": 0.5,
+            "falsePositiveRate": 1.0,
+            "f1Score": 0.6666667,
+            "recallAt1": 1.0,
+            "precisionAt1": 0.5,
+            "falsePositiveRateAt1": 1.0,
+            "f1ScoreAt1": 0.6666667,
+            "truePositiveCount": "415",
+            "falsePositiveCount": "415",
+        },
+        {
+            "recall": 1.0,
+            "precision": 0.5,
+            "falsePositiveRate": 1.0,
+            "f1Score": 0.6666667,
+            "recallAt1": 0.74216866,
+            "precisionAt1": 0.74216866,
+            "falsePositiveRateAt1": 0.25783134,
+            "f1ScoreAt1": 0.74216866,
+            "truePositiveCount": "415",
+            "falsePositiveCount": "415",
+        },
+    ],
+}
+
+
+@pytest.fixture
+def get_model_mock():
+    with mock.patch.object(
+        model_service_client.ModelServiceClient, "get_model"
+    ) as get_model_mock:
+        get_model_mock.return_value = gca_model.Model(
+            display_name=_TEST_MODEL_NAME,
+            name=_TEST_MODEL_RESOURCE_NAME,
+        )
+
+        yield get_model_mock
+
+
+@pytest.fixture
+def mock_model():
+    model = mock.MagicMock(models.Model)
+    model.name = _TEST_MODEL_ID
+    model._latest_future = None
+    model._exception = None
+    model._gca_resource = gca_model.Model(
+        display_name="test-eval-model",
+        description="This is the mock Model's description",
+        name=_TEST_MODEL_NAME,
+    )
+    yield model
+
+
+# ModelEvaluation mocks
+@pytest.fixture
+def mock_model_eval_get():
+    with mock.patch.object(
+        model_service_client.ModelServiceClient, "get_model_evaluation"
+    ) as mock_get_model_eval:
+        mock_get_model_eval.return_value = gca_model_evaluation.ModelEvaluation(
+            name=_TEST_MODEL_EVAL_RESOURCE_NAME,
+            metrics=_TEST_MODEL_EVAL_METRICS,
+        )
+        yield mock_get_model_eval
+
+
+class TestModelEvaluation:
+    def test_init_model_evaluation_with_only_resource_name(self, mock_model_eval_get):
+        aiplatform.init(project=_TEST_PROJECT)
+
+        aiplatform.ModelEvaluation(evaluation_name=_TEST_MODEL_EVAL_RESOURCE_NAME)
+
+        mock_model_eval_get.assert_called_once_with(
+            name=_TEST_MODEL_EVAL_RESOURCE_NAME, retry=base._DEFAULT_RETRY
+        )
+
+    def test_init_model_evaluation_with_eval_id_and_model_id(self, mock_model_eval_get):
+        aiplatform.init(project=_TEST_PROJECT)
+
+        aiplatform.ModelEvaluation(
+            evaluation_name=_TEST_EVAL_ID, model_id=_TEST_MODEL_ID
+        )
+
+        mock_model_eval_get.assert_called_once_with(
+            name=_TEST_MODEL_EVAL_RESOURCE_NAME, retry=base._DEFAULT_RETRY
+        )
+
+    def test_init_model_evaluatin_with_id_project_and_location(
+        self, mock_model_eval_get
+    ):
+        aiplatform.init(project=_TEST_PROJECT)
+
+        aiplatform.ModelEvaluation(
+            evaluation_name=_TEST_MODEL_EVAL_RESOURCE_NAME,
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        mock_model_eval_get.assert_called_once_with(
+            name=_TEST_MODEL_EVAL_RESOURCE_NAME, retry=base._DEFAULT_RETRY
+        )
+
+    def test_init_model_evaluation_with_invalid_evaluation_resource_raises(
+        self, mock_model_eval_get
+    ):
+        aiplatform.init(project=_TEST_PROJECT)
+
+        with pytest.raises(ValueError):
+            aiplatform.ModelEvaluation(evaluation_name=_TEST_MODEL_RESOURCE_NAME)
+
+    def test_get_model_evaluation_metrics(self, mock_model_eval_get):
+        aiplatform.init(project=_TEST_PROJECT)
+
+        eval_metrics = aiplatform.ModelEvaluation(
+            evaluation_name=_TEST_MODEL_EVAL_RESOURCE_NAME
+        ).metrics
+        assert eval_metrics == _TEST_MODEL_EVAL_METRICS
+
+    def test_no_delete_model_evaluation_method(self, mock_model_eval_get):
+
+        my_eval = aiplatform.ModelEvaluation(
+            evaluation_name=_TEST_MODEL_EVAL_RESOURCE_NAME
+        )
+
+        with pytest.raises(NotImplementedError):
+            my_eval.delete()
diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py
index 88d9be865f..0e1673b84b 100644
--- a/tests/unit/aiplatform/test_models.py
+++ b/tests/unit/aiplatform/test_models.py
@@ -50,6 +50,7 @@
     explanation as gca_explanation,
     machine_resources as gca_machine_resources,
     model_service as gca_model_service,
+    model_evaluation as gca_model_evaluation,
     endpoint_service as gca_endpoint_service,
     encryption_spec as gca_encryption_spec,
 )
@@ -183,6 +184,53 @@
 _TEST_SUPPORTED_EXPORT_FORMATS_UNSUPPORTED = []
 _TEST_CONTAINER_REGISTRY_DESTINATION
 
+# Model Evaluation
+_TEST_MODEL_EVAL_RESOURCE_NAME = f"{_TEST_MODEL_RESOURCE_NAME}/evaluations/{_TEST_ID}"
+_TEST_MODEL_EVAL_METRICS = {
+    "auPrc": 0.80592036,
+    "auRoc": 0.8100363,
+    "logLoss": 0.53061414,
+    "confidenceMetrics": [
+        {
+            "confidenceThreshold": -0.01,
+            "recall": 1.0,
+            "precision": 0.5,
+            "falsePositiveRate": 1.0,
+            "f1Score": 0.6666667,
+            "recallAt1": 1.0,
+            "precisionAt1": 0.5,
+            "falsePositiveRateAt1": 1.0,
+            "f1ScoreAt1": 0.6666667,
+            "truePositiveCount": "415",
+            "falsePositiveCount": "415",
+        },
+        {
+            "recall": 1.0,
+            "precision": 0.5,
+            "falsePositiveRate": 1.0,
+            "f1Score": 0.6666667,
+            "recallAt1": 0.74216866,
+            "precisionAt1": 0.74216866,
+            "falsePositiveRateAt1": 0.25783134,
+            "f1ScoreAt1": 0.74216866,
+            "truePositiveCount": "415",
+            "falsePositiveCount": "415",
+        },
+    ],
+}
+
+_TEST_MODEL_EVAL_LIST = [
+    gca_model_evaluation.ModelEvaluation(
+        name=_TEST_MODEL_EVAL_RESOURCE_NAME,
+    ),
+    gca_model_evaluation.ModelEvaluation(
+        name=_TEST_MODEL_EVAL_RESOURCE_NAME,
+    ),
+    gca_model_evaluation.ModelEvaluation(
+        name=_TEST_MODEL_EVAL_RESOURCE_NAME,
+    ),
+]
+
 
 @pytest.fixture
 def mock_model():
@@ -470,6 +518,28 @@ def mock_storage_blob_upload_from_filename():
         yield mock_blob_upload_from_filename
 
 
+# ModelEvaluation mocks
+@pytest.fixture
+def mock_model_eval_get():
+    with mock.patch.object(
+        model_service_client.ModelServiceClient, "get_model_evaluation"
+    ) as mock_get_model_eval:
+        mock_get_model_eval.return_value = gca_model_evaluation.ModelEvaluation(
+            name=_TEST_MODEL_EVAL_RESOURCE_NAME,
+            metrics=_TEST_MODEL_EVAL_METRICS,
+        )
+        yield mock_get_model_eval
+
+
+@pytest.fixture
+def list_model_evaluations_mock():
+    with mock.patch.object(
+        model_service_client.ModelServiceClient, "list_model_evaluations"
+    ) as list_model_evaluations_mock:
+        list_model_evaluations_mock.return_value = _TEST_MODEL_EVAL_LIST
+        yield list_model_evaluations_mock
+
+
 class TestModel:
     def setup_method(self):
         importlib.reload(initializer)
@@ -1856,3 +1926,48 @@ def test_update(self, update_model_mock, get_model_mock):
         update_model_mock.assert_called_once_with(
             model=current_model_proto, update_mask=update_mask
         )
+
+    def test_get_model_evaluation_with_id(
+        self,
+        mock_model_eval_get,
+        get_model_mock,
+        list_model_evaluations_mock,
+    ):
+        test_model = models.Model(model_name=_TEST_MODEL_RESOURCE_NAME)
+
+        test_model.get_model_evaluation(evaluation_id=_TEST_ID)
+
+        mock_model_eval_get.assert_called_once_with(
+            name=_TEST_MODEL_EVAL_RESOURCE_NAME, retry=base._DEFAULT_RETRY
+        )
+
+    def test_get_model_evaluation_without_id(
+        self,
+        mock_model_eval_get,
+        get_model_mock,
+        list_model_evaluations_mock,
+    ):
+        test_model = models.Model(model_name=_TEST_MODEL_RESOURCE_NAME)
+
+        test_model.get_model_evaluation()
+
+        list_model_evaluations_mock.assert_called_once_with(
+            request={"parent": _TEST_MODEL_RESOURCE_NAME, "filter": None}
+        )
+
+    def test_list_model_evaluations(
+        self,
+        get_model_mock,
+        mock_model_eval_get,
+        list_model_evaluations_mock,
+    ):
+
+        test_model = models.Model(model_name=_TEST_MODEL_RESOURCE_NAME)
+
+        eval_list = test_model.list_model_evaluations()
+
+        list_model_evaluations_mock.assert_called_once_with(
+            request={"parent": _TEST_MODEL_RESOURCE_NAME, "filter": None}
+        )
+
+        assert len(eval_list) == len(_TEST_MODEL_EVAL_LIST)