Introdcue _is_available_on_pypi and use it to determine whether we …

…need to use `--no-conda` for `pyfunc_serve_and_score_model` (mlflow#4627) * Introdcue _is_available_on_pypi Signed-off-by: harupy <hkawamura0130@gmail.com> * Address comments Signed-off-by: harupy <hkawamura0130@gmail.com> * print torch version Signed-off-by: harupy <hkawamura0130@gmail.com> * update Signed-off-by: harupy <hkawamura0130@gmail.com> * remove print Signed-off-by: harupy <hkawamura0130@gmail.com> * Fix _is_available_on_pypi Signed-off-by: harupy <hkawamura0130@gmail.com> * Install libopenblas-dev Signed-off-by: harupy <hkawamura0130@gmail.com> * add pytest Signed-off-by: harupy <hkawamura0130@gmail.com> * Add test_pyfunc_serve_and_score Signed-off-by: harupy <hkawamura0130@gmail.com> * Fix test_pyfunc_serve_and_score Signed-off-by: harupy <hkawamura0130@gmail.com> * tensorflow Signed-off-by: harupy <hkawamura0130@gmail.com> * fix lambda Signed-off-by: harupy <hkawamura0130@gmail.com> * comment Signed-off-by: harupy <hkawamura0130@gmail.com> * Install libopenblas-dev Signed-off-by: harupy <hkawamura0130@gmail.com> * update master.yml Signed-off-by: harupy <hkawamura0130@gmail.com> * specify extra_args Signed-off-by: harupy <hkawamura0130@gmail.com> * remove blank lines Signed-off-by: harupy <hkawamura0130@gmail.com> * Fix package name Signed-off-by: harupy <hkawamura0130@gmail.com> * remove extra_pip_requirements Signed-off-by: harupy <hkawamura0130@gmail.com> * Fix tests/sklearn/test_sklearn_model_export.py Signed-off-by: harupy <hkawamura0130@gmail.com> * spacy Signed-off-by: harupy <hkawamura0130@gmail.com> * lint Signed-off-by: harupy <hkawamura0130@gmail.com>
jinnig · Aug 4, 2021 · 71141c6 · 71141c6
1 parent b1b24dc
commit 71141c6
Show file tree

Hide file tree

Showing 14 changed files with 153 additions and 15 deletions.
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -314,6 +314,8 @@ jobs:
         run: |
           export PATH="$CONDA_DIR/bin:$PATH"
           source activate test-environment
+          # Install libopenblas-dev for mxnet 1.8.0.post0
+          sudo apt-get install libopenblas-dev
           ./dev/run-python-flavor-tests.sh;
 
   import:

diff --git a/mlflow/ml-package-versions.yml b/mlflow/ml-package-versions.yml
@@ -187,6 +187,8 @@ gluon:
     maximum: "1.8.0.post0"
     unsupported: ["1.8.0"] # MXNet 1.8.0 is a flawed release that we don't expect to work with
     run: |
+      # Install libopenblas-dev for mxnet 1.8.0.post0
+      sudo apt-get install libopenblas-dev
       pytest tests/gluon/test_gluon_model_export.py --large
 
   autologging:

diff --git a/tests/catboost/test_catboost_model_export.py b/tests/catboost/test_catboost_model_export.py
@@ -26,8 +26,11 @@
     pyfunc_serve_and_score_model,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
 )
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("catboost") else ["--no-conda"]
+
 ModelWithData = namedtuple("ModelWithData", ["model", "inference_dataframe"])
 
 

diff --git a/tests/gluon/test_gluon_model_export.py b/tests/gluon/test_gluon_model_export.py
@@ -29,6 +29,7 @@
     pyfunc_serve_and_score_model,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
 )
 
 if Version(mx.__version__) >= Version("2.0.0"):
@@ -41,6 +42,9 @@
     array_module = mx.nd
 
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("mxnet") else ["--no-conda"]
+
+
 @pytest.fixture
 def model_path(tmpdir):
     return os.path.join(tmpdir.strpath, "model")
@@ -309,7 +313,7 @@ def test_gluon_model_serving_and_scoring_as_pyfunc(gluon_model, model_data):
         model_uri=model_uri,
         data=pd.DataFrame(test_data.asnumpy()),
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
-        extra_args=["--no-conda"],
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     response_values = pd.read_json(scoring_response.content, orient="records").values.astype(
         np.float32

diff --git a/tests/helper_functions.py b/tests/helper_functions.py
@@ -21,6 +21,7 @@
 from mlflow.tracking.artifact_utils import _download_artifact_from_uri
 from mlflow.utils.file_utils import read_yaml, write_yaml
 from mlflow.utils.environment import _get_pip_deps, _CONSTRAINTS_FILE_NAME
+from mlflow.utils.requirements_utils import _strip_local_version_identifier, _get_installed_version
 
 LOCALHOST = "127.0.0.1"
 
@@ -348,3 +349,29 @@ def _assert_pip_requirements(model_uri, requirements, constraints=None):
         assert f"-c {_CONSTRAINTS_FILE_NAME}" in conda_reqs
         cons = _read_lines(os.path.join(local_path, _CONSTRAINTS_FILE_NAME))
         assert cons == constraints
+
+
+def _is_available_on_pypi(package, version=None, module=None):
+    """
+    Returns True if the specified package version is available on PyPI.
+
+    :param package: The name of the package.
+    :param version: The version of the package. If None, defaults to the installed version.
+    :param module: The name of the top-level module provided by the package . For example,
+                   if `package` is 'scikit-learn', `module` should be 'sklearn'. If None, defaults
+                   to `package`.
+    """
+    resp = requests.get("https://pypi.python.org/pypi/{}/json".format(package))
+    if not resp.ok:
+        return False
+
+    module = module or package
+    version = version or _get_installed_version(module)
+    version = _strip_local_version_identifier(version)
+
+    dist_files = resp.json()["releases"].get(version)
+    return (
+        dist_files is not None  # specified version exists
+        and (len(dist_files) > 0)  # at least one distribution file exists
+        and not dist_files[0].get("yanked", False)  # specified version is not yanked
+    )
diff --git a/tests/keras/test_keras_model_export.py b/tests/keras/test_keras_model_export.py
@@ -41,12 +41,15 @@
     score_model_in_sagemaker_docker_container,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
 )
 from tests.helper_functions import set_boto_credentials  # pylint: disable=unused-import
 from tests.helper_functions import mock_s3_bucket  # pylint: disable=unused-import
 from tests.pyfunc.test_spark import score_model_as_udf
 from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("keras") else ["--no-conda"]
+
 
 @pytest.fixture(scope="module", autouse=True)
 def fix_random_seed():
@@ -261,7 +264,7 @@ def test_model_save_load(build_model, save_format, model_path, data):
         model_uri=os.path.abspath(model_path),
         data=pd.DataFrame(x),
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
-        extra_args=["--no-conda"],
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     print(scoring_response.content)
     actual_scoring_response = pd.read_json(
@@ -309,7 +312,7 @@ def test_custom_model_save_load(custom_model, custom_layer, data, custom_predict
         model_uri=os.path.abspath(model_path),
         data=pd.DataFrame(x),
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
-        extra_args=["--no-conda"],
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     assert np.allclose(
         pd.read_json(scoring_response.content, orient="records", encoding="utf8").values.astype(

diff --git a/tests/lightgbm/test_lightgbm_model_export.py b/tests/lightgbm/test_lightgbm_model_export.py
@@ -31,8 +31,11 @@
     pyfunc_serve_and_score_model,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
 )
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("lightgbm") else ["--no-conda"]
+
 ModelWithData = namedtuple("ModelWithData", ["model", "inference_dataframe"])
 
 
@@ -379,6 +382,7 @@ def test_pyfunc_serve_and_score(lgb_model):
         model_uri,
         data=inference_dataframe,
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     scores = pd.read_json(resp.content, orient="records").values.squeeze()
     np.testing.assert_array_almost_equal(scores, model.predict(inference_dataframe))

diff --git a/tests/onnx/test_onnx_model_export.py b/tests/onnx/test_onnx_model_export.py
@@ -24,6 +24,7 @@
     pyfunc_serve_and_score_model,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
 )
 from mlflow.tracking.artifact_utils import _download_artifact_from_uri
 from mlflow.utils.environment import _mlflow_conda_env
@@ -37,6 +38,8 @@
     (sys.version_info < (3, 6)), reason="Tests require Python 3 to run!"
 )
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("onnx") else ["--no-conda"]
+
 
 @pytest.fixture(scope="module")
 def data():
@@ -277,7 +280,7 @@ def test_model_save_load_evaluate_pyfunc_format(onnx_model, model_path, data, pr
         model_uri=os.path.abspath(model_path),
         data=x,
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
-        extra_args=["--no-conda"],
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     assert np.allclose(
         pd.read_json(scoring_response.content, orient="records")
@@ -319,7 +322,7 @@ def test_model_save_load_evaluate_pyfunc_format_multiple_inputs(
         model_uri=os.path.abspath(model_path),
         data=data_multiple_inputs,
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
-        extra_args=["--no-conda"],
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     assert np.allclose(
         pd.read_json(scoring_response.content, orient="records").values,

diff --git a/tests/pytorch/test_pytorch_model_export.py b/tests/pytorch/test_pytorch_model_export.py
@@ -30,7 +30,11 @@
 from mlflow.utils.model_utils import _get_flavor_configuration
 from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
 
-from tests.helper_functions import _compare_conda_env_requirements, _assert_pip_requirements
+from tests.helper_functions import (
+    _compare_conda_env_requirements,
+    _assert_pip_requirements,
+    _is_available_on_pypi,
+)
 
 _logger = logging.getLogger(__name__)
 
@@ -47,6 +51,8 @@
         "Failed to import test helper functions. Tests depending on these functions may fail!"
     )
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("torch") else ["--no-conda"]
+
 
 @pytest.fixture(scope="module")
 def data():
@@ -594,10 +600,7 @@ def test_pyfunc_model_serving_with_module_scoped_subclassed_model_and_default_co
     module_scoped_subclassed_model, model_path, data
 ):
     mlflow.pytorch.save_model(
-        path=model_path,
-        pytorch_model=module_scoped_subclassed_model,
-        conda_env=None,
-        code_paths=[__file__],
+        path=model_path, pytorch_model=module_scoped_subclassed_model, code_paths=[__file__],
     )
 
     scoring_response = pyfunc_serve_and_score_model(
@@ -668,10 +671,7 @@ def test_load_model_succeeds_with_dependencies_specified_via_code_paths(
     # `tests` module is not available when the model is deployed for local scoring, we include
     # the test suite file as a code dependency
     mlflow.pytorch.save_model(
-        path=model_path,
-        pytorch_model=module_scoped_subclassed_model,
-        conda_env=None,
-        code_paths=[__file__],
+        path=model_path, pytorch_model=module_scoped_subclassed_model, code_paths=[__file__],
     )
 
     # Define a custom pyfunc model that loads a PyTorch model artifact using
@@ -907,6 +907,25 @@ def test_load_model_raises_exception_when_pickle_module_cannot_be_imported(
     assert bad_pickle_module_name in str(exc_info)
 
 
+@pytest.mark.large
+def test_pyfunc_serve_and_score(data):
+    model = torch.nn.Linear(4, 1)
+    train_model(model=model, data=data)
+
+    with mlflow.start_run():
+        mlflow.pytorch.log_model(model, artifact_path="model")
+        model_uri = mlflow.get_artifact_uri("model")
+
+    resp = pyfunc_serve_and_score_model(
+        model_uri,
+        data[0],
+        pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
+    )
+    scores = pd.DataFrame(json.loads(resp.content))
+    np.testing.assert_array_almost_equal(scores.values[:, 0], _predict(model=model, data=data))
+
+
 @pytest.mark.release
 def test_sagemaker_docker_model_scoring_with_sequential_model_and_default_conda_env(
     model, model_path, data, sequential_predicted

diff --git a/tests/sklearn/test_sklearn_model_export.py b/tests/sklearn/test_sklearn_model_export.py
@@ -38,6 +38,11 @@
     pyfunc_serve_and_score_model,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
+)
+
+EXTRA_PYFUNC_SERVING_TEST_ARGS = (
+    [] if _is_available_on_pypi("scikit-learn", module="sklearn") else ["--no-conda"]
 )
 
 ModelWithData = namedtuple("ModelWithData", ["model", "inference_data"])
@@ -661,6 +666,7 @@ def test_pyfunc_serve_and_score(sklearn_knn_model):
         model_uri,
         data=pd.DataFrame(inference_dataframe),
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     scores = pd.read_json(resp.content, orient="records").values.squeeze()
     np.testing.assert_array_almost_equal(scores, model.predict(inference_dataframe))
diff --git a/tests/spacy/test_spacy_model_export.py b/tests/spacy/test_spacy_model_export.py
@@ -13,6 +13,7 @@
 from sklearn.datasets import fetch_20newsgroups
 
 from mlflow import pyfunc
+import mlflow.pyfunc.scoring_server as pyfunc_scoring_server
 from mlflow.exceptions import MlflowException
 from mlflow.models import Model, infer_signature
 from mlflow.models.utils import _read_example
@@ -21,7 +22,14 @@
 from mlflow.utils.file_utils import TempDir
 from mlflow.utils.model_utils import _get_flavor_configuration
 from tests.conftest import tracking_uri_mock  # pylint: disable=unused-import, E0611
-from tests.helper_functions import _compare_conda_env_requirements, _assert_pip_requirements
+from tests.helper_functions import (
+    pyfunc_serve_and_score_model,
+    _compare_conda_env_requirements,
+    _assert_pip_requirements,
+    _is_available_on_pypi,
+)
+
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("spacy") else ["--no-conda"]
 
 ModelWithData = namedtuple("ModelWithData", ["model", "inference_data"])
 
@@ -402,6 +410,24 @@ def test_model_log_without_pyfunc_flavor():
         assert loaded_model.flavors.keys() == {"spacy"}
 
 
+@pytest.mark.large
+def test_pyfunc_serve_and_score(spacy_model_with_data):
+    model, inference_dataframe = spacy_model_with_data
+    artifact_path = "model"
+    with mlflow.start_run():
+        mlflow.spacy.log_model(model, artifact_path)
+        model_uri = mlflow.get_artifact_uri(artifact_path)
+
+    resp = pyfunc_serve_and_score_model(
+        model_uri,
+        data=inference_dataframe,
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
+    )
+    scores = pd.read_json(resp.content, orient="records")
+    pd.testing.assert_frame_equal(scores, _predict(model, inference_dataframe))
+
+
 def _train_model(nlp, train_data, n_iter=5):
     optimizer = nlp.begin_training()
     batch_sizes = compounding(4.0, 32.0, 1.001)

diff --git a/tests/statsmodels/test_statsmodels_model_export.py b/tests/statsmodels/test_statsmodels_model_export.py
@@ -25,6 +25,7 @@
     pyfunc_serve_and_score_model,
     _compare_conda_env_requirements,
     _assert_pip_requirements,
+    _is_available_on_pypi,
 )
 from tests.helper_functions import mock_s3_bucket  # pylint: disable=unused-import
 from tests.helper_functions import set_boto_credentials  # pylint: disable=unused-import
@@ -42,6 +43,7 @@
     wls_model,
 )
 
+EXTRA_PYFUNC_SERVING_TEST_ARGS = [] if _is_available_on_pypi("statsmodels") else ["--no-conda"]
 
 # The code in this file has been adapted from the test cases of the lightgbm flavor.
 
@@ -432,6 +434,7 @@ def test_pyfunc_serve_and_score(ols_model):
         model_uri,
         data=pd.DataFrame(inference_dataframe),
         content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
     )
     scores = pd.read_json(resp.content, orient="records").values.squeeze()
     np.testing.assert_array_almost_equal(scores, model.predict(inference_dataframe))