From ba065c3c624a9ad04cd5d144ef5141825ecbe57c Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Sat, 26 Oct 2024 18:07:15 -0500 Subject: [PATCH] [serve] Remove ability to specify `route_prefix` at deployment level (#48223) Removes the ability to specify `route_prefix` at the deployment level (which has been deprecated for a number of releases). Also rips out some vestigial code which was unused: - `Deployment._deploy` - `log_deployment_update_status` - `get_deployment` Signed-off-by: Edward Oakes Signed-off-by: mohitjain2504 --- .../doc_code/image_classifier_example.py | 4 +- .../serve/doc_code/translator_example.py | 4 +- .../src/main/java/io/ray/serve/api/Serve.java | 34 +++-- .../ray/serve/api/ServeControllerClient.java | 25 ++-- .../src/main/java/io/ray/serve/dag/Graph.java | 63 +--------- .../io/ray/serve/deployment/Deployment.java | 36 +----- .../serve/deployment/DeploymentCreator.java | 26 +--- .../CrossLanguageDeploymentTest.java | 10 +- .../serve/deployment/DeploymentGraphTest.java | 4 +- .../ray/serve/deployment/DeploymentTest.java | 4 +- .../serve/docdemo/ManagePythonDeployment.java | 2 +- .../io/ray/serve/repdemo/DeploymentDemo.java | 2 +- python/ray/serve/_private/api.py | 40 +----- .../ray/serve/_private/application_state.py | 24 ++-- python/ray/serve/_private/client.py | 57 ++------- python/ray/serve/_private/controller.py | 24 +--- python/ray/serve/_private/deploy_utils.py | 2 +- .../_private/deployment_function_node.py | 11 -- .../serve/_private/deployment_graph_build.py | 82 +------------ python/ray/serve/_private/utils.py | 22 ++++ python/ray/serve/api.py | 60 ++++----- python/ray/serve/deployment.py | 109 +++------------- python/ray/serve/schema.py | 33 +---- python/ray/serve/scripts.py | 16 +-- python/ray/serve/tests/BUILD | 2 - python/ray/serve/tests/test_api.py | 63 ++-------- python/ray/serve/tests/test_cli_2.py | 37 +----- python/ray/serve/tests/test_deploy.py | 4 - python/ray/serve/tests/test_deploy_2.py | 19 --- python/ray/serve/tests/test_deploy_app.py | 53 +------- .../tests/test_deployment_graph_ingress.py | 116 ------------------ .../ray/serve/tests/test_deployment_node.py | 82 ------------- python/ray/serve/tests/test_fastapi.py | 3 +- python/ray/serve/tests/test_http_routes.py | 45 ++++--- python/ray/serve/tests/test_standalone.py | 42 +------ python/ray/serve/tests/test_websockets.py | 5 +- .../tests/unit/test_application_state.py | 27 +--- .../serve/tests/unit/test_deployment_class.py | 2 - python/ray/serve/tests/unit/test_schema.py | 36 ------ .../workloads/torch_tune_serve_test.py | 5 +- 40 files changed, 193 insertions(+), 1042 deletions(-) delete mode 100644 python/ray/serve/tests/test_deployment_graph_ingress.py delete mode 100644 python/ray/serve/tests/test_deployment_node.py diff --git a/doc/source/serve/doc_code/image_classifier_example.py b/doc/source/serve/doc_code/image_classifier_example.py index 3f2fec9ef4ce..3e4cf1fe8bcf 100644 --- a/doc/source/serve/doc_code/image_classifier_example.py +++ b/doc/source/serve/doc_code/image_classifier_example.py @@ -35,7 +35,7 @@ async def __call__(self, req: starlette.requests.Request): return await self.classify(req["image_url"]) -app = ImageClassifier.options(route_prefix="/classify").bind(downloader.bind()) +app = ImageClassifier.bind(downloader.bind()) # __serve_example_end__ @@ -65,7 +65,7 @@ async def __call__(self, req: starlette.requests.Request): # __serve_example_modified_end__ -serve.run(app, name="app1") +serve.run(app, name="app1", route_prefix="/classify") # __request_begin__ bear_url = "https://cdn.britannica.com/41/156441-050-A4424AEC/Grizzly-bear-Jasper-National-Park-Canada-Alberta.jpg" # noqa resp = requests.post("http://localhost:8000/classify", json={"image_url": bear_url}) diff --git a/doc/source/serve/doc_code/translator_example.py b/doc/source/serve/doc_code/translator_example.py index 3159f613b848..602a58ba9655 100644 --- a/doc/source/serve/doc_code/translator_example.py +++ b/doc/source/serve/doc_code/translator_example.py @@ -21,11 +21,11 @@ async def __call__(self, req: starlette.requests.Request): return self.translate(req["text"]) -app = Translator.options(route_prefix="/translate").bind() +app = Translator.bind() # __serve_example_end__ -serve.run(app, name="app2") +serve.run(app, name="app2", route_prefix="/translate") # __request_begin__ text = "Hello, the weather is quite fine today!" diff --git a/java/serve/src/main/java/io/ray/serve/api/Serve.java b/java/serve/src/main/java/io/ray/serve/api/Serve.java index 38d38b2cfe85..759ff63dfade 100644 --- a/java/serve/src/main/java/io/ray/serve/api/Serve.java +++ b/java/serve/src/main/java/io/ray/serve/api/Serve.java @@ -297,8 +297,7 @@ public static Deployment getDeployment(String name) { name, deploymentRoute.getDeploymentInfo().getDeploymentConfig(), deploymentRoute.getDeploymentInfo().getReplicaConfig(), - deploymentRoute.getDeploymentInfo().getVersion(), - deploymentRoute.getRoute()); + deploymentRoute.getDeploymentInfo().getVersion()); } /** @@ -307,7 +306,7 @@ public static Deployment getDeployment(String name) { * @param target A Serve application returned by `Deployment.bind()`. * @return A handle that can be used to call the application. */ - public static Optional run(Application target) { + public static DeploymentHandle run(Application target) { return run(target, true, Constants.SERVE_DEFAULT_APP_NAME, null, null); } @@ -318,13 +317,11 @@ public static Optional run(Application target) { * @param blocking * @param name Application name. If not provided, this will be the only application running on the * cluster (it will delete all others). - * @param routePrefix Route prefix for HTTP requests. If not provided, it will use route_prefix of - * the ingress deployment. If specified neither as an argument nor in the ingress deployment, - * the route prefix will default to '/'. + * @param routePrefix Route prefix for HTTP requests. Defaults to '/'. * @param config * @return A handle that can be used to call the application. */ - public static Optional run( + public static DeploymentHandle run( Application target, boolean blocking, String name, @@ -335,19 +332,20 @@ public static Optional run( throw new RayServeException("Application name must a non-empty string."); } + if (StringUtils.isNotBlank(routePrefix)) { + Preconditions.checkArgument( + routePrefix.startsWith("/"), "The route_prefix must start with a forward slash ('/')"); + } else { + routePrefix = "/"; + } + ServeControllerClient client = serveStart(config); List deployments = Graph.build(target.getInternalDagNode(), name); - Deployment ingress = Graph.getAndValidateIngressDeployment(deployments); + Deployment ingressDeployment = deployments.get(deployments.size() - 1); for (Deployment deployment : deployments) { // Overwrite route prefix - if (StringUtils.isNotBlank(deployment.getRoutePrefix()) - && StringUtils.isNotBlank(routePrefix)) { - Preconditions.checkArgument( - routePrefix.startsWith("/"), "The route_prefix must start with a forward slash ('/')"); - deployment.setRoutePrefix(routePrefix); - } deployment .getDeploymentConfig() .setVersion( @@ -356,12 +354,8 @@ public static Optional run( : RandomStringUtils.randomAlphabetic(6)); } - client.deployApplication(name, deployments, blocking); - - return Optional.ofNullable(ingress) - .map( - ingressDeployment -> - client.getDeploymentHandle(ingressDeployment.getName(), name, true)); + client.deployApplication(name, routePrefix, deployments, ingressDeployment.getName(), blocking); + return client.getDeploymentHandle(ingressDeployment.getName(), name, true); } private static void init() { diff --git a/java/serve/src/main/java/io/ray/serve/api/ServeControllerClient.java b/java/serve/src/main/java/io/ray/serve/api/ServeControllerClient.java index fdbb37c5906f..654eecdbaea8 100644 --- a/java/serve/src/main/java/io/ray/serve/api/ServeControllerClient.java +++ b/java/serve/src/main/java/io/ray/serve/api/ServeControllerClient.java @@ -160,11 +160,19 @@ public BaseActorHandle getController() { /** * Deployment an application with deployment list. * - * @param name application name - * @param deployments deployment list + * @param name application name. + * @param routePrefix route prefix for the application. + * @param deployments deployment list. + * @param ingressDeploymentName name of the ingress deployment (the one that is exposed over + * HTTP). * @param blocking Wait for the applications to be deployed or not. */ - public void deployApplication(String name, List deployments, boolean blocking) { + public void deployApplication( + String name, + String routePrefix, + List deployments, + String ingressDeploymentName, + boolean blocking) { Object[] deploymentArgsArray = new Object[deployments.size()]; @@ -178,8 +186,8 @@ public void deployApplication(String name, List deployments, boolean ByteString.copyFrom(deployment.getDeploymentConfig().toProtoBytes())) .setIngress(deployment.isIngress()) .setDeployerJobId(Ray.getRuntimeContext().getCurrentJobId().toString()); - if (deployment.getRoutePrefix() != null) { - deploymentArgs.setRoutePrefix(deployment.getRoutePrefix()); + if (deployment.getName() == ingressDeploymentName) { + deploymentArgs.setRoutePrefix(routePrefix); } deploymentArgsArray[i] = deploymentArgs.build().toByteArray(); } @@ -195,7 +203,6 @@ public void deployApplication(String name, List deployments, boolean logDeploymentReady( deployment.getName(), deployment.getVersion(), - deployment.getUrl(), "component=serve deployment=" + deployment.getName()); } } @@ -238,13 +245,11 @@ private void waitForApplicationRunning(String name, Long timeoutS) { "Application {} did not become RUNNING after {}s.", name, timeoutS)); } - private void logDeploymentReady(String name, String version, String url, String tag) { - String urlPart = url != null ? MessageFormatter.format(" at `{}`", url) : ""; + private void logDeploymentReady(String name, String version, String tag) { LOGGER.info( - "Deployment '{}{}' is ready {}. {}", + "Deployment '{}{}' is ready. {}", name, StringUtils.isNotBlank(version) ? "':'" + version : "", - urlPart, tag); } diff --git a/java/serve/src/main/java/io/ray/serve/dag/Graph.java b/java/serve/src/main/java/io/ray/serve/dag/Graph.java index 8a999cfa8d94..7fe048ec4981 100644 --- a/java/serve/src/main/java/io/ray/serve/dag/Graph.java +++ b/java/serve/src/main/java/io/ray/serve/dag/Graph.java @@ -1,13 +1,9 @@ package io.ray.serve.dag; -import com.google.common.base.Preconditions; import io.ray.serve.deployment.Deployment; import io.ray.serve.handle.DeploymentHandle; -import io.ray.serve.util.CollectionUtil; import io.ray.serve.util.CommonUtil; import io.ray.serve.util.DAGUtil; -import io.ray.serve.util.MessageFormatter; -import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -19,37 +15,7 @@ public class Graph { public static List build(DAGNode rayDagRootNode, String name) { DAGNodeBase serveRootDag = rayDagRootNode.applyRecursive(node -> transformRayDagToServeDag(node, name)); - List deployments = extractDeployments(serveRootDag); - List deploymentsWithHttp = processIngressDeploymentInServeDag(deployments); - return deploymentsWithHttp; - } - - private static List processIngressDeploymentInServeDag(List deployments) { - if (CollectionUtil.isEmpty(deployments)) { - return deployments; - } - - Deployment ingressDeployment = deployments.get(deployments.size() - 1); - if (StringUtils.isBlank(ingressDeployment.getRoutePrefix()) - || StringUtils.equals( - ingressDeployment.getRoutePrefix(), "/" + ingressDeployment.getName())) { - ingressDeployment.setRoutePrefix("/"); - } - - for (int i = 0; i < deployments.size() - 1; i++) { - Deployment deployment = deployments.get(i); - Preconditions.checkArgument( - StringUtils.isBlank(deployment.getRoutePrefix()) - || StringUtils.equals(deployment.getRoutePrefix(), "/" + deployment.getName()), - MessageFormatter.format( - "Route prefix is only configurable on the ingress deployment. " - + "Please do not set non-default route prefix: " - + "{} on non-ingress deployment of the " - + "serve DAG. ", - deployment.getRoutePrefix())); - deployment.setRoutePrefix(null); - } - return deployments; + return extractDeployments(serveRootDag); } public static DAGNodeBase transformRayDagToServeDag(DAGNodeBase dagNode, String appName) { @@ -64,12 +30,6 @@ public static DAGNodeBase transformRayDagToServeDag(DAGNodeBase dagNode, String deploymentName = deploymentShell.getName(); } - String routePrefix = - StringUtils.isBlank(deploymentShell.getRoutePrefix()) - || !StringUtils.equals(deploymentShell.getRoutePrefix(), "/" + deploymentName) - ? deploymentShell.getRoutePrefix() - : "/" + deploymentName; - Object[] replacedDeploymentInitArgs = new Object[clsNode.getBoundArgs().length]; for (int i = 0; i < clsNode.getBoundArgs().length; i++) { replacedDeploymentInitArgs[i] = @@ -84,7 +44,6 @@ public static DAGNodeBase transformRayDagToServeDag(DAGNodeBase dagNode, String .setDeploymentDef(clsNode.getClassName()) .setName(deploymentName) .setInitArgs(replacedDeploymentInitArgs) - .setRoutePrefix(routePrefix) .create(false); return new DeploymentNode( @@ -111,26 +70,6 @@ public static List extractDeployments(DAGNodeBase rootNode) { return deployments.values().stream().collect(Collectors.toList()); } - public static Deployment getAndValidateIngressDeployment(List deployments) { - - List ingressDeployments = new ArrayList<>(); - for (Deployment deployment : deployments) { - if (StringUtils.isNotBlank(deployment.getRoutePrefix())) { - ingressDeployments.add(deployment); - } - } - - Preconditions.checkArgument( - ingressDeployments.size() == 1, - MessageFormatter.format( - "Only one deployment in an Serve Application or DAG can have non-None route prefix. {} ingress deployments found: {}", - ingressDeployments.size(), - ingressDeployments)); - - ingressDeployments.get(0).setIngress(true); - return ingressDeployments.get(0); - } - public static DeploymentHandle replaceWithHandle(DAGNode node) { if (node instanceof DeploymentNode) { DeploymentNode deploymentNode = (DeploymentNode) node; diff --git a/java/serve/src/main/java/io/ray/serve/deployment/Deployment.java b/java/serve/src/main/java/io/ray/serve/deployment/Deployment.java index ded2c0f3cad7..64fc4be0a6ad 100644 --- a/java/serve/src/main/java/io/ray/serve/deployment/Deployment.java +++ b/java/serve/src/main/java/io/ray/serve/deployment/Deployment.java @@ -10,7 +10,6 @@ import io.ray.serve.handle.DeploymentHandle; import java.util.HashMap; import java.util.Map; -import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,30 +31,12 @@ public class Deployment { private final String version; - private String routePrefix; - - private final String url; - private boolean ingress; // TODO support placement group. public Deployment( - String name, - DeploymentConfig deploymentConfig, - ReplicaConfig replicaConfig, - String version, - String routePrefix) { - - if (StringUtils.isNotBlank(routePrefix)) { - Preconditions.checkArgument(routePrefix.startsWith("/"), "route_prefix must start with '/'."); - Preconditions.checkArgument( - routePrefix.equals("/") || !routePrefix.endsWith("/"), - "route_prefix must not end with '/' unless it's the root."); - Preconditions.checkArgument( - !routePrefix.contains("{") && !routePrefix.contains("}"), - "route_prefix may not contain wildcards."); - } + String name, DeploymentConfig deploymentConfig, ReplicaConfig replicaConfig, String version) { Preconditions.checkArgument( version != null || deploymentConfig.getAutoscalingConfig() == null, @@ -65,8 +46,6 @@ public Deployment( this.version = version; this.deploymentConfig = deploymentConfig; this.replicaConfig = replicaConfig; - this.routePrefix = routePrefix; - this.url = routePrefix != null ? Serve.getGlobalClient().getRootUrl() + routePrefix : null; } /** @@ -97,7 +76,6 @@ public DeploymentCreator options() { .setVersion(this.version) .setNumReplicas(this.deploymentConfig.getNumReplicas()) .setInitArgs(this.replicaConfig.getInitArgs()) - .setRoutePrefix(this.routePrefix) .setRayActorOptions(this.replicaConfig.getRayActorOptions()) .setUserConfig(this.deploymentConfig.getUserConfig()) .setMaxOngoingRequests(this.deploymentConfig.getMaxOngoingRequests()) @@ -147,18 +125,6 @@ public String getVersion() { return version; } - public String getRoutePrefix() { - return routePrefix; - } - - public String getUrl() { - return url; - } - - public void setRoutePrefix(String routePrefix) { - this.routePrefix = routePrefix; - } - public boolean isIngress() { return ingress; } diff --git a/java/serve/src/main/java/io/ray/serve/deployment/DeploymentCreator.java b/java/serve/src/main/java/io/ray/serve/deployment/DeploymentCreator.java index 99009b98c427..1515d8eb0450 100644 --- a/java/serve/src/main/java/io/ray/serve/deployment/DeploymentCreator.java +++ b/java/serve/src/main/java/io/ray/serve/deployment/DeploymentCreator.java @@ -43,16 +43,6 @@ public class DeploymentCreator { */ private Object[] initArgs; - /** - * Requests to paths under this HTTP path prefix will be routed to this deployment. Defaults to - * '/{name}'. When set to 'None', no HTTP endpoint will be created. Routing is done based on - * longest-prefix match, so if you have deployment A with a prefix of '/a' and deployment B with a - * prefix of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests to '/a/b', '/a/b/', - * and '/a/b/c' go to B. Routes must not end with a '/' unless they're the root (just '/'), which - * acts as a catch-all. - */ - @Deprecated private String routePrefix; - /** Options to be passed to the Ray actor constructor such as resource requirements. */ private Map rayActorOptions; @@ -97,10 +87,6 @@ public Deployment create(boolean check) { LOGGER.warn( "DeprecationWarning: `version` in `@serve.deployment` has been deprecated. Explicitly specifying version will raise an error in the future!"); } - if (routePrefix != null) { - LOGGER.warn( - "DeprecationWarning: `route_prefix` in `@serve.deployment` has been deprecated. To specify a route prefix for an application, pass it into `serve.run` instead."); - } DeploymentConfig deploymentConfig = new DeploymentConfig() @@ -120,8 +106,7 @@ public Deployment create(boolean check) { StringUtils.isNotBlank(name) ? name : CommonUtil.getDeploymentName(deploymentDef), deploymentConfig, replicaConfig, - version, - routePrefix); + version); } public Deployment create() { @@ -177,15 +162,6 @@ public DeploymentCreator setInitArgs(Object[] initArgs) { return this; } - public String getRoutePrefix() { - return routePrefix; - } - - public DeploymentCreator setRoutePrefix(String routePrefix) { - this.routePrefix = routePrefix; - return this; - } - public Map getRayActorOptions() { return rayActorOptions; } diff --git a/java/serve/src/test/java/io/ray/serve/deployment/CrossLanguageDeploymentTest.java b/java/serve/src/test/java/io/ray/serve/deployment/CrossLanguageDeploymentTest.java index 21cbf94ad946..d8ad835df017 100644 --- a/java/serve/src/test/java/io/ray/serve/deployment/CrossLanguageDeploymentTest.java +++ b/java/serve/src/test/java/io/ray/serve/deployment/CrossLanguageDeploymentTest.java @@ -56,7 +56,7 @@ public void createPyClassTest() { .setNumReplicas(1) .bind("28"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); Assert.assertEquals(handle.method("increase").remote("6").result(), "34"); } @@ -70,7 +70,7 @@ public void createPyClassWithObjectRefTest() { .setNumReplicas(1) .bind("28"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); ObjectRef numRef = Ray.put(10); Assert.assertEquals(handle.method("increase").remote(numRef).result(), "38"); } @@ -84,7 +84,7 @@ public void createPyMethodTest() { .setDeploymentDef(PYTHON_MODULE + ".echo_server") .setNumReplicas(1) .bind(); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); Assert.assertEquals(handle.method("__call__").remote("6").result(), "6"); } @@ -97,7 +97,7 @@ public void createPyMethodWithObjectRefTest() { .setDeploymentDef(PYTHON_MODULE + ".echo_server") .setNumReplicas(1) .bind(); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); ObjectRef numRef = Ray.put("10"); Assert.assertEquals(handle.method("__call__").remote(numRef).result(), "10"); } @@ -112,7 +112,7 @@ public void userConfigTest() throws InterruptedException { .setNumReplicas(1) .setUserConfig("1") .bind("28"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); Assert.assertEquals(handle.method("increase").remote("6").result(), "7"); // deployment.options().setUserConfig("3").create().deploy(true); // TimeUnit.SECONDS.sleep(20L); diff --git a/java/serve/src/test/java/io/ray/serve/deployment/DeploymentGraphTest.java b/java/serve/src/test/java/io/ray/serve/deployment/DeploymentGraphTest.java index 722bf4b7d687..159a4235ca2f 100644 --- a/java/serve/src/test/java/io/ray/serve/deployment/DeploymentGraphTest.java +++ b/java/serve/src/test/java/io/ray/serve/deployment/DeploymentGraphTest.java @@ -68,7 +68,7 @@ public void bindTest() { Application deployment = Serve.deployment().setDeploymentDef(Counter.class.getName()).setNumReplicas(1).bind("2"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); Assert.assertEquals(handle.remote("2").result(), "4"); } @@ -108,7 +108,7 @@ public void testPassHandle() { Application driver = Serve.deployment().setDeploymentDef(Combiner.class.getName()).bind(modelA, modelB); - DeploymentHandle handle = Serve.run(driver).get(); + DeploymentHandle handle = Serve.run(driver); Assert.assertEquals(handle.remote("test").result(), "A:test,B:test"); } diff --git a/java/serve/src/test/java/io/ray/serve/deployment/DeploymentTest.java b/java/serve/src/test/java/io/ray/serve/deployment/DeploymentTest.java index 649a318ec996..edd615169b13 100644 --- a/java/serve/src/test/java/io/ray/serve/deployment/DeploymentTest.java +++ b/java/serve/src/test/java/io/ray/serve/deployment/DeploymentTest.java @@ -31,7 +31,7 @@ public void deployTest() { .setNumReplicas(1) .setUserConfig("_test") .bind("echo_"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); Assert.assertEquals(handle.method("call").remote("6").result(), "echo_6_test"); Assert.assertTrue((boolean) handle.method("checkHealth").remote().result()); } @@ -100,7 +100,7 @@ public void autoScaleTest() { .setVersion("v1") .bind("echo_"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); Assert.assertEquals(handle.method("call").remote("6").result(), "echo_6_test"); } diff --git a/java/serve/src/test/java/io/ray/serve/docdemo/ManagePythonDeployment.java b/java/serve/src/test/java/io/ray/serve/docdemo/ManagePythonDeployment.java index ef822b73095c..4f1e8aeec7ad 100644 --- a/java/serve/src/test/java/io/ray/serve/docdemo/ManagePythonDeployment.java +++ b/java/serve/src/test/java/io/ray/serve/docdemo/ManagePythonDeployment.java @@ -23,7 +23,7 @@ public static void main(String[] args) { .setDeploymentDef("counter.Counter") .setNumReplicas(1) .bind("1"); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); System.out.println(handle.method("increase").remote("2").result()); } diff --git a/java/serve/src/test/java/io/ray/serve/repdemo/DeploymentDemo.java b/java/serve/src/test/java/io/ray/serve/repdemo/DeploymentDemo.java index 116825501238..6eaed2afc1f6 100644 --- a/java/serve/src/test/java/io/ray/serve/repdemo/DeploymentDemo.java +++ b/java/serve/src/test/java/io/ray/serve/repdemo/DeploymentDemo.java @@ -18,7 +18,7 @@ public String call() { public static void main(String[] args) { Application deployment = Serve.deployment().setDeploymentDef(DeploymentDemo.class.getName()).bind(); - DeploymentHandle handle = Serve.run(deployment).get(); + DeploymentHandle handle = Serve.run(deployment); System.out.println(handle.remote().result()); } } diff --git a/python/ray/serve/_private/api.py b/python/ray/serve/_private/api.py index 0e4d25188fb6..97641dc47f32 100644 --- a/python/ray/serve/_private/api.py +++ b/python/ray/serve/_private/api.py @@ -20,51 +20,13 @@ from ray.serve._private.controller import ServeController from ray.serve.config import HTTPOptions, gRPCOptions from ray.serve.context import _get_global_client, _set_global_client -from ray.serve.deployment import Application, Deployment +from ray.serve.deployment import Application from ray.serve.exceptions import RayServeException from ray.serve.schema import LoggingConfig logger = logging.getLogger(SERVE_LOGGER_NAME) -def get_deployment(name: str, app_name: str = ""): - """Dynamically fetch a handle to a Deployment object. - - Args: - name: name of the deployment. This must have already been - deployed. - - Returns: - Deployment - """ - try: - ( - deployment_info, - route_prefix, - ) = _get_global_client().get_deployment_info(name, app_name) - except KeyError: - if len(app_name) == 0: - msg = ( - f"Deployment {name} was not found. Did you call Deployment.deploy()? " - "Note that `serve.get_deployment()` can only be used to fetch a " - "deployment that was deployed using the 1.x API `Deployment.deploy()`. " - "If you want to fetch a handle to an application deployed through " - "`serve.run` or through a Serve config, please use " - "`serve.get_app_handle()` instead." - ) - else: - msg = f"Deployment {name} in application {app_name} was not found." - raise KeyError(msg) - return Deployment( - name, - deployment_info.deployment_config, - deployment_info.replica_config, - version=deployment_info.version, - route_prefix=route_prefix, - _internal=True, - ) - - def _check_http_options( client: ServeControllerClient, http_options: Union[dict, HTTPOptions] ) -> None: diff --git a/python/ray/serve/_private/application_state.py b/python/ray/serve/_private/application_state.py index 50432b610733..55d273fd7cb4 100644 --- a/python/ray/serve/_private/application_state.py +++ b/python/ray/serve/_private/application_state.py @@ -37,6 +37,7 @@ DEFAULT, check_obj_ref_ready_nowait, override_runtime_envs_except_env_vars, + validate_route_prefix, ) from ray.serve.config import AutoscalingConfig from ray.serve.exceptions import RayServeException @@ -931,7 +932,10 @@ def deploy_app(self, name: str, deployment_args: List[Dict]) -> None: } for deploy_param in deployment_args: - deploy_app_prefix: str = deploy_param["route_prefix"] + deploy_app_prefix = deploy_param.get("route_prefix", None) + if deploy_app_prefix is None: + continue + app_name = live_route_prefixes.get(deploy_app_prefix) if app_name is not None: raise RayServeException( @@ -1139,9 +1143,6 @@ def build_serve_application( try: from ray.serve._private.api import call_app_builder_with_args_if_necessary from ray.serve._private.deployment_graph_build import build as pipeline_build - from ray.serve._private.deployment_graph_build import ( - get_and_validate_ingress_deployment, - ) # Import and build the application. args_info_str = f" with arguments {args}" if args else "" @@ -1149,7 +1150,7 @@ def build_serve_application( app = call_app_builder_with_args_if_necessary(import_attr(import_path), args) deployments = pipeline_build(app._get_internal_dag_node(), name) - ingress = get_and_validate_ingress_deployment(deployments) + ingress = deployments[-1] deploy_args_list = [] for deployment in deployments: @@ -1161,7 +1162,7 @@ def build_serve_application( ingress=is_ingress, deployment_config=deployment._deployment_config, version=code_version, - route_prefix=deployment.route_prefix, + route_prefix="/" if is_ingress else None, docs_path=deployment._docs_path, ) ) @@ -1213,6 +1214,11 @@ def override_deployment_info( options["max_ongoing_requests"] = options.get("max_ongoing_requests") deployment_name = options["name"] + if deployment_name not in deployment_infos: + raise ValueError( + f"Got config override for nonexistent deployment '{deployment_name}'" + ) + info = deployment_infos[deployment_name] original_options = info.deployment_config.dict() original_options["user_configured_option_names"].update(set(options)) @@ -1239,11 +1245,6 @@ def override_deployment_info( # What to pass to info.update override_options = dict() - # Override route prefix if specified in deployment config - deployment_route_prefix = options.pop("route_prefix", DEFAULT.VALUE) - if deployment_route_prefix is not DEFAULT.VALUE: - override_options["route_prefix"] = deployment_route_prefix - # Merge app-level and deployment-level runtime_envs. replica_config = info.replica_config app_runtime_env = override_config.runtime_env @@ -1306,6 +1307,7 @@ def override_deployment_info( # Overwrite ingress route prefix app_route_prefix = config_dict.get("route_prefix", DEFAULT.VALUE) + validate_route_prefix(app_route_prefix) for deployment in list(deployment_infos.values()): if ( app_route_prefix is not DEFAULT.VALUE diff --git a/python/ray/serve/_private/client.py b/python/ray/serve/_private/client.py index 390fe4580055..c5d6b90da6bb 100644 --- a/python/ray/serve/_private/client.py +++ b/python/ray/serve/_private/client.py @@ -2,7 +2,7 @@ import random import time from functools import wraps -from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple, Union import ray from ray.actor import ActorHandle @@ -251,8 +251,12 @@ def deploy_application( deployments: List[Dict], _blocking: bool = True, ): + ingress_route_prefix = None deployment_args_list = [] for deployment in deployments: + if deployment["ingress"]: + ingress_route_prefix = deployment["route_prefix"] + deployment_args = get_deploy_args( deployment["name"], replica_config=deployment["replica_config"], @@ -283,13 +287,11 @@ def deploy_application( ray.get(self._controller.deploy_application.remote(name, deployment_args_list)) if _blocking: self._wait_for_application_running(name) - for deployment in deployments: - deployment_name = deployment["name"] - tag = f"component=serve deployment={deployment_name}" - url = deployment["url"] - version = deployment["version"] - - self.log_deployment_ready(deployment_name, version, url, tag) + if ingress_route_prefix is not None: + url_part = " at " + self._root_url + ingress_route_prefix + else: + url_part = "" + logger.info(f"Application '{name}' is ready{url_part}.") @_ensure_connected def deploy_apps( @@ -363,15 +365,6 @@ def delete_all_apps(self, blocking: bool = True): all_apps.append(status.name) self.delete_apps(all_apps, blocking) - @_ensure_connected - def delete_deployments(self, names: Iterable[str], blocking: bool = True) -> None: - """Delete 1.x deployments.""" - - ray.get(self._controller.delete_deployments.remote(names)) - if blocking: - for name in names: - self._wait_for_deployment_deleted(name, "") - @_ensure_connected def get_deployment_info( self, name: str, app_name: str @@ -468,36 +461,6 @@ def get_handle( return handle - @_ensure_connected - def log_deployment_update_status( - self, name: str, version: str, updating: bool - ) -> str: - tag = f"component=serve deployment={name}" - - if updating: - msg = f"Updating deployment '{name}'" - if version is not None: - msg += f" to version '{version}'" - logger.info(f"{msg}. {tag}") - else: - logger.info( - f"Deployment '{name}' is already at version " - f"'{version}', not updating. {tag}" - ) - - return tag - - @_ensure_connected - def log_deployment_ready(self, name: str, version: str, url: str, tag: str) -> None: - if url is not None: - url_part = f" at `{url}`" - else: - url_part = "" - logger.info( - f"Deployment '{name}{':'+version if version else ''}' is ready" - f"{url_part}. {tag}" - ) - @_ensure_connected def record_multiplexed_replica_info(self, info: MultiplexedReplicaInfo): """Record multiplexed replica information for replica. diff --git a/python/ray/serve/_private/controller.py b/python/ray/serve/_private/controller.py index 5a1606d6f311..8eff4c80315a 100644 --- a/python/ray/serve/_private/controller.py +++ b/python/ray/serve/_private/controller.py @@ -48,7 +48,6 @@ from ray.serve._private.storage.kv_store import RayInternalKVStore from ray.serve._private.usage import ServeUsageTag from ray.serve._private.utils import ( - DEFAULT, call_function_from_import_path, get_all_live_placement_group_names, get_head_node_id, @@ -334,8 +333,8 @@ def get_all_endpoints_java(self) -> bytes: # NOTE(zcin): Java only supports 1.x deployments, so only return # a dictionary of deployment name -> endpoint info data = { - endpoint_tag.name: EndpointInfoProto(route=endppint_dict["route"]) - for endpoint_tag, endppint_dict in endpoints.items() + endpoint_tag.name: EndpointInfoProto(route=endpoint_dict["route"]) + for endpoint_tag, endpoint_dict in endpoints.items() } return EndpointSet(endpoints=data).SerializeToString() @@ -739,12 +738,12 @@ def deploy_application(self, name: str, deployment_args_list: List[bytes]) -> No "deployment_config_proto_bytes": deployment_args.deployment_config, "replica_config_proto_bytes": deployment_args.replica_config, "deployer_job_id": deployment_args.deployer_job_id, + "ingress": deployment_args.ingress, "route_prefix": ( deployment_args.route_prefix if deployment_args.HasField("route_prefix") else None ), - "ingress": deployment_args.ingress, "docs_path": ( deployment_args.docs_path if deployment_args.HasField("docs_path") @@ -787,14 +786,6 @@ def apply_config( self._target_capacity = config.target_capacity for app_config in config.applications: - for deployments in app_config.deployments: - if deployments.route_prefix != DEFAULT.VALUE: - logger.warning( - "Specifying route prefix for a deployment is deprecated. " - "Please specify route prefix at an application level in the " - "Serve config instead." - ) - # If the application logging config is not set, use the global logging # config. if app_config.logging_config is None and config.logging_config: @@ -874,9 +865,7 @@ def get_serve_instance_details(self) -> Dict: error messages, etc. Returns: - Dict that follows the format of the schema ServeInstanceDetails. Currently, - there is a value set for every field at all schema levels, except for the - route_prefix in the deployment_config for each deployment. + Dict that follows the format of the schema ServeInstanceDetails. """ http_config = self.get_http_config() @@ -913,10 +902,7 @@ def get_serve_instance_details(self) -> Dict: ) # NOTE(zcin): We use exclude_unset here because we explicitly and intentionally - # fill in all info that should be shown to users. Currently, every field is set - # except for the route_prefix in the deployment_config of each deployment, since - # route_prefix is set instead in each application. - # Eventually we want to remove route_prefix from DeploymentSchema. + # fill in all info that should be shown to users. http_options = HTTPOptionsSchema.parse_obj(http_config.dict(exclude_unset=True)) grpc_options = gRPCOptionsSchema.parse_obj(grpc_config.dict(exclude_unset=True)) return ServeInstanceDetails( diff --git a/python/ray/serve/_private/deploy_utils.py b/python/ray/serve/_private/deploy_utils.py index 24bfacde6450..405ab8f18909 100644 --- a/python/ray/serve/_private/deploy_utils.py +++ b/python/ray/serve/_private/deploy_utils.py @@ -67,10 +67,10 @@ def deploy_args_to_deployment_info( deployment_config_proto_bytes: bytes, replica_config_proto_bytes: bytes, deployer_job_id: Union[str, bytes], - route_prefix: Optional[str], docs_path: Optional[str], app_name: Optional[str] = None, ingress: bool = False, + route_prefix: Optional[str] = None, **kwargs, ) -> DeploymentInfo: """Takes deployment args passed to the controller after building an application and diff --git a/python/ray/serve/_private/deployment_function_node.py b/python/ray/serve/_private/deployment_function_node.py index c82f118c3bf9..031faf910e46 100644 --- a/python/ray/serve/_private/deployment_function_node.py +++ b/python/ray/serve/_private/deployment_function_node.py @@ -36,20 +36,9 @@ def __init__( ] deployment_shell = schema_to_deployment(deployment_schema) - # Set the route prefix, prefer the one user supplied, - # otherwise set it to /deployment_name - if ( - deployment_shell.route_prefix is None - or deployment_shell.route_prefix != f"/{deployment_shell.name}" - ): - route_prefix = deployment_shell.route_prefix - else: - route_prefix = f"/{deployment_name}" - self._deployment = deployment_shell.options( func_or_class=func_body, name=self._deployment_name, - route_prefix=route_prefix, _init_args=(), _init_kwargs={}, _internal=True, diff --git a/python/ray/serve/_private/deployment_graph_build.py b/python/ray/serve/_private/deployment_graph_build.py index c3c05af635ad..f5ef841c7e58 100644 --- a/python/ray/serve/_private/deployment_graph_build.py +++ b/python/ray/serve/_private/deployment_graph_build.py @@ -92,34 +92,8 @@ def build( "you're using the DAG API, the function should be bound to a DAGDriver." ) - # Validate and only expose HTTP for the endpoint. - deployments_with_http = process_ingress_deployment_in_serve_dag(deployments) - return deployments_with_http - - -def get_and_validate_ingress_deployment( - deployments: List[Deployment], -) -> Deployment: - """Validation for http route prefixes for a list of deployments in pipeline. - - Ensures: - 1) One and only one ingress deployment with given route prefix. - 2) All other not ingress deployments should have prefix of None. - """ - - ingress_deployments = [] - for deployment in deployments: - if deployment.route_prefix is not None: - ingress_deployments.append(deployment) - - if len(ingress_deployments) != 1: - raise ValueError( - "Only one deployment in an Serve Application or DAG can have " - f"non-None route prefix. {len(ingress_deployments)} ingress " - f"deployments found: {ingress_deployments}" - ) - - return ingress_deployments[0] + # The last deployment in the list is the ingress. + return deployments def transform_ray_dag_to_serve_dag( @@ -168,20 +142,9 @@ def transform_ray_dag_to_serve_dag( ): deployment_name = deployment_shell.name - # Set the route prefix, prefer the one user supplied, - # otherwise set it to /deployment_name - if ( - deployment_shell.route_prefix is None - or deployment_shell.route_prefix != f"/{deployment_shell.name}" - ): - route_prefix = deployment_shell.route_prefix - else: - route_prefix = f"/{deployment_name}" - deployment = deployment_shell.options( func_or_class=dag_node._body, name=deployment_name, - route_prefix=route_prefix, _init_args=replaced_deployment_init_args, _init_kwargs=replaced_deployment_init_kwargs, _internal=True, @@ -257,44 +220,3 @@ def extractor(dag_node): serve_dag_root.apply_recursive(extractor) return list(deployments.values()) - - -def process_ingress_deployment_in_serve_dag( - deployments: List[Deployment], -) -> List[Deployment]: - """Mark the last fetched deployment in a serve dag as exposed with default - prefix. - """ - if len(deployments) == 0: - return deployments - - # Last element of the list is the root deployment if it's applicable type - # that wraps an deployment, given Ray DAG traversal is done bottom-up. - ingress_deployment = deployments[-1] - if ingress_deployment.route_prefix in [None, f"/{ingress_deployment.name}"]: - # Override default prefix to "/" on the ingress deployment, if user - # didn't provide anything in particular. - new_ingress_deployment = ingress_deployment.options( - route_prefix="/", - _internal=True, - ) - deployments[-1] = new_ingress_deployment - - # Erase all non ingress deployment route prefix - for i, deployment in enumerate(deployments[:-1]): - if ( - deployment.route_prefix is not None - and deployment.route_prefix != f"/{deployment.name}" - ): - raise ValueError( - "Route prefix is only configurable on the ingress deployment. " - "Please do not set non-default route prefix: " - f"{deployment.route_prefix} on non-ingress deployment of the " - "serve DAG. " - ) - else: - # Erase all default prefix to None for non-ingress deployments to - # disable HTTP - deployments[i] = deployment.options(route_prefix=None, _internal=True) - - return deployments diff --git a/python/ray/serve/_private/utils.py b/python/ray/serve/_private/utils.py index 8bf7e2f9d859..78851fc419fe 100644 --- a/python/ray/serve/_private/utils.py +++ b/python/ray/serve/_private/utils.py @@ -578,3 +578,25 @@ def get_component_file_name( suffix=suffix, ) return file_name + + +def validate_route_prefix(route_prefix: Union[DEFAULT, None, str]): + if route_prefix is DEFAULT.VALUE or route_prefix is None: + return + + if not route_prefix.startswith("/"): + raise ValueError( + f"Invalid route_prefix '{route_prefix}', " + "must start with a forward slash ('/')." + ) + + if route_prefix != "/" and route_prefix.endswith("/"): + raise ValueError( + f"Invalid route_prefix '{route_prefix}', " + "may not end with a trailing '/'." + ) + + if "{" in route_prefix or "}" in route_prefix: + raise ValueError( + f"Invalid route_prefix '{route_prefix}', " "may not contain wildcards." + ) diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py index d49f74a98fd7..512c328f134d 100644 --- a/python/ray/serve/api.py +++ b/python/ray/serve/api.py @@ -18,9 +18,6 @@ ) from ray.serve._private.constants import SERVE_DEFAULT_APP_NAME, SERVE_LOGGER_NAME from ray.serve._private.deployment_graph_build import build as pipeline_build -from ray.serve._private.deployment_graph_build import ( - get_and_validate_ingress_deployment, -) from ray.serve._private.http_util import ( ASGIAppReplicaWrapper, make_fastapi_class_based_view, @@ -32,6 +29,7 @@ ensure_serialization_context, extract_self_if_method_call, get_random_string, + validate_route_prefix, ) from ray.serve.config import ( AutoscalingConfig, @@ -282,8 +280,6 @@ class MyDeployment: this deployment. Defaults to 1. autoscaling_config: Parameters to configure autoscaling behavior. If this is set, `num_replicas` cannot be set. - route_prefix: [DEPRECATED] Route prefix should be set per-application - through `serve.run()` or the config file. ray_actor_options: Options to pass to the Ray Actor decorator, such as resource requirements. Valid options are: `accelerator_type`, `memory`, `num_cpus`, `num_gpus`, `resources`, and `runtime_env`. @@ -325,6 +321,11 @@ class MyDeployment: Returns: `Deployment` """ + if route_prefix is not DEFAULT.VALUE: + raise ValueError( + "`route_prefix` can no longer be specified at the deployment level. " + "Pass it to `serve.run` or in the application config instead." + ) if max_ongoing_requests is None: raise ValueError("`max_ongoing_requests` must be non-null, got None.") @@ -367,13 +368,6 @@ class MyDeployment: "Explicitly specifying version will raise an error in the future!" ) - if route_prefix is not DEFAULT.VALUE: - logger.warning( - "DeprecationWarning: `route_prefix` in `@serve.deployment` has been " - "deprecated. To specify a route prefix for an application, pass it into " - "`serve.run` instead." - ) - if isinstance(logging_config, LoggingConfig): logging_config = logging_config.dict() @@ -421,7 +415,6 @@ def decorator(_func_or_class): deployment_config, replica_config, version=(version if version is not DEFAULT.VALUE else None), - route_prefix=route_prefix, _internal=True, ) @@ -435,7 +428,7 @@ def _run( target: Application, _blocking: bool = True, name: str = SERVE_DEFAULT_APP_NAME, - route_prefix: str = DEFAULT.VALUE, + route_prefix: Optional[str] = "/", logging_config: Optional[Union[Dict, LoggingConfig]] = None, ) -> DeploymentHandle: """Run an application and return a handle to its ingress deployment. @@ -446,6 +439,8 @@ def _run( if len(name) == 0: raise RayServeException("Application name must a non-empty string.") + validate_route_prefix(route_prefix) + client = _private_api.serve_start( http_options={"location": "EveryNode"}, ) @@ -455,7 +450,7 @@ def _run( if isinstance(target, Application): deployments = pipeline_build(target._get_internal_dag_node(), name) - ingress = get_and_validate_ingress_deployment(deployments) + ingress_deployment_name = deployments[-1].name else: msg = "`serve.run` expects an `Application` returned by `Deployment.bind()`." if isinstance(target, DAGNode): @@ -466,44 +461,33 @@ def _run( raise TypeError(msg) parameter_group = [] - for deployment in deployments: - # Overwrite route prefix - if route_prefix is not DEFAULT.VALUE and deployment._route_prefix is not None: - if route_prefix is not None and not route_prefix.startswith("/"): - raise ValueError( - "The route_prefix must start with a forward slash ('/')" - ) - - deployment._route_prefix = route_prefix + is_ingress = deployment._name == ingress_deployment_name if deployment.logging_config is None and logging_config: - if isinstance(logging_config, dict): - logging_config = LoggingConfig(**logging_config) - deployment.set_logging_config(logging_config.dict()) + deployment = deployment.options(logging_config=logging_config) + deployment_parameters = { "name": deployment._name, "replica_config": deployment._replica_config, "deployment_config": deployment._deployment_config, "version": deployment._version or get_random_string(), - "route_prefix": deployment.route_prefix, - "url": deployment.url, + "route_prefix": route_prefix if is_ingress else None, "docs_path": deployment._docs_path, - "ingress": deployment._name == ingress._name, + "ingress": is_ingress, } parameter_group.append(deployment_parameters) + client.deploy_application( name, parameter_group, _blocking=_blocking, ) - if ingress is not None: - # The deployment state is not guaranteed to be created after - # deploy_application returns; the application state manager will - # need another reconcile iteration to create it. - client._wait_for_deployment_created(ingress.name, name) - handle = client.get_handle(ingress.name, name, check_exists=False) - return handle + # The deployment state is not guaranteed to be created after + # deploy_application returns; the application state manager will + # need another reconcile iteration to create it. + client._wait_for_deployment_created(ingress_deployment_name, name) + return client.get_handle(ingress_deployment_name, name, check_exists=False) @PublicAPI(stability="stable") @@ -511,7 +495,7 @@ def run( target: Application, blocking: bool = False, name: str = SERVE_DEFAULT_APP_NAME, - route_prefix: Optional[str] = DEFAULT.VALUE, + route_prefix: Optional[str] = "/", logging_config: Optional[Union[Dict, LoggingConfig]] = None, ) -> DeploymentHandle: """Run an application and return a handle to its ingress deployment. diff --git a/python/ray/serve/deployment.py b/python/ray/serve/deployment.py index 54c34d878c31..e621cf88434d 100644 --- a/python/ray/serve/deployment.py +++ b/python/ray/serve/deployment.py @@ -15,7 +15,6 @@ from ray.serve._private.usage import ServeUsageTag from ray.serve._private.utils import DEFAULT, Default from ray.serve.config import AutoscalingConfig -from ray.serve.context import _get_global_client from ray.serve.schema import DeploymentSchema, LoggingConfig, RayActorOptionsSchema from ray.util.annotations import PublicAPI @@ -117,7 +116,6 @@ def __init__( deployment_config: DeploymentConfig, replica_config: ReplicaConfig, version: Optional[str] = None, - route_prefix: Union[str, None, DEFAULT] = DEFAULT.VALUE, _internal=False, ) -> None: if not _internal: @@ -129,18 +127,6 @@ def __init__( raise TypeError("name must be a string.") if not (version is None or isinstance(version, str)): raise TypeError("version must be a string.") - if route_prefix is not DEFAULT.VALUE and route_prefix is not None: - if not isinstance(route_prefix, str): - raise TypeError("route_prefix must be a string.") - if not route_prefix.startswith("/"): - raise ValueError("route_prefix must start with '/'.") - if route_prefix != "/" and route_prefix.endswith("/"): - raise ValueError( - "route_prefix must not end with '/' unless it's the root." - ) - if "{" in route_prefix or "}" in route_prefix: - raise ValueError("route_prefix may not contain wildcards.") - docs_path = None if ( inspect.isclass(replica_config.deployment_def) @@ -154,7 +140,6 @@ def __init__( self._version = version self._deployment_config = deployment_config self._replica_config = replica_config - self._route_prefix = route_prefix self._docs_path = docs_path @property @@ -192,11 +177,11 @@ def max_queued_requests(self) -> int: return self._deployment_config.max_queued_requests @property - def route_prefix(self) -> Optional[str]: - """HTTP route prefix that this deployment is exposed under.""" - if self._route_prefix is DEFAULT.VALUE: - return f"/{self._name}" - return self._route_prefix + def route_prefix(self): + raise ValueError( + "`route_prefix` can no longer be specified at the deployment level. " + "Pass it to `serve.run` or in the application config instead." + ) @property def ray_actor_options(self) -> Optional[Dict]: @@ -213,11 +198,11 @@ def init_kwargs(self) -> Tuple[Any]: @property def url(self) -> Optional[str]: - if self._route_prefix is None: - # this deployment is not exposed over HTTP - return None - - return _get_global_client().root_url + self.route_prefix + logger.warning( + "DeprecationWarning: `Deployment.url` is deprecated " + "and will be removed in the future." + ) + return None @property def logging_config(self) -> Dict: @@ -265,45 +250,6 @@ def bind(self, *args, **kwargs) -> Application: return Application._from_internal_dag_node(dag_node) - def _deploy(self, *init_args, _blocking=True, **init_kwargs): - """Deploy or update this deployment. - - Args: - init_args: args to pass to the class __init__ - method. Not valid if this deployment wraps a function. - init_kwargs: kwargs to pass to the class __init__ - method. Not valid if this deployment wraps a function. - """ - if len(init_args) == 0 and self._replica_config.init_args is not None: - init_args = self._replica_config.init_args - if len(init_kwargs) == 0 and self._replica_config.init_kwargs is not None: - init_kwargs = self._replica_config.init_kwargs - - replica_config = ReplicaConfig.create( - self._replica_config.deployment_def, - init_args=init_args, - init_kwargs=init_kwargs, - ray_actor_options=self._replica_config.ray_actor_options, - placement_group_bundles=self._replica_config.placement_group_bundles, - placement_group_strategy=self._replica_config.placement_group_strategy, - max_replicas_per_node=self._replica_config.max_replicas_per_node, - ) - - return _get_global_client().deploy( - self._name, - replica_config=replica_config, - deployment_config=self._deployment_config, - version=self._version, - route_prefix=self.route_prefix, - url=self.url, - _blocking=_blocking, - ) - - def _delete(self): - """Delete this deployment.""" - - return _get_global_client().delete_deployments([self._name]) - def options( self, func_or_class: Optional[Callable] = None, @@ -337,6 +283,11 @@ def options( Refer to the `@serve.deployment` decorator docs for available arguments. """ + if route_prefix is not DEFAULT.VALUE: + raise ValueError( + "`route_prefix` can no longer be specified at the deployment level. " + "Pass it to `serve.run` or in the application config instead." + ) # Modify max_ongoing_requests and autoscaling_config if # `num_replicas="auto"` @@ -390,13 +341,6 @@ def options( "future!" ) - if not _internal and route_prefix is not DEFAULT.VALUE: - logger.warning( - "DeprecationWarning: `route_prefix` in `@serve.deployment` has been " - "deprecated. To specify a route prefix for an application, pass it " - "into `serve.run` instead." - ) - elif num_replicas not in [DEFAULT.VALUE, None]: new_deployment_config.num_replicas = num_replicas @@ -424,10 +368,6 @@ def options( if _init_kwargs is DEFAULT.VALUE: _init_kwargs = self._replica_config.init_kwargs - if route_prefix is DEFAULT.VALUE: - # Default is to keep the previous value - route_prefix = self._route_prefix - if ray_actor_options is DEFAULT.VALUE: ray_actor_options = self._replica_config.ray_actor_options @@ -479,7 +419,6 @@ def options( new_deployment_config, new_replica_config, version=version, - route_prefix=route_prefix, _internal=True, ) @@ -491,35 +430,25 @@ def __eq__(self, other): self._deployment_config == other._deployment_config, self._replica_config.init_args == other._replica_config.init_args, self._replica_config.init_kwargs == other._replica_config.init_kwargs, - # compare route prefix with default value resolved - self.route_prefix == other.route_prefix, self._replica_config.ray_actor_options == self._replica_config.ray_actor_options, ] ) def __str__(self): - return ( - f"Deployment(name={self._name}," - f"version={self._version}," - f"route_prefix={self.route_prefix})" - ) + return f"Deployment(name={self._name})" def __repr__(self): return str(self) def deployment_to_schema( - d: Deployment, include_route_prefix: bool = True + d: Deployment, ) -> DeploymentSchema: """Converts a live deployment object to a corresponding structured schema. Args: d: Deployment object to convert - include_route_prefix: Whether to include the route_prefix in the returned - schema. This should be set to False if the schema will be included in a - higher-level object describing an application, and you want to place - route_prefix at the application level. """ if d.ray_actor_options is not None: @@ -547,9 +476,6 @@ def deployment_to_schema( "logging_config": d._deployment_config.logging_config, } - if include_route_prefix: - deployment_options["route_prefix"] = d.route_prefix - # Let non-user-configured options be set to defaults. If the schema # is converted back to a deployment, this lets Serve continue tracking # which options were set by the user. Name is a required field in the @@ -627,6 +553,5 @@ def schema_to_deployment(s: DeploymentSchema) -> Deployment: name=s.name, deployment_config=deployment_config, replica_config=replica_config, - route_prefix=s.route_prefix, _internal=True, ) diff --git a/python/ray/serve/schema.py b/python/ray/serve/schema.py index e04684cf3aa6..5556b341af36 100644 --- a/python/ray/serve/schema.py +++ b/python/ray/serve/schema.py @@ -278,13 +278,6 @@ class DeploymentSchema(BaseModel, allow_population_by_field_name=True): "(experimental)." ), ) - # route_prefix of None means the deployment is not exposed over HTTP. - route_prefix: Union[str, None] = Field( - default=DEFAULT.VALUE, - description=( - "[DEPRECATED] Please use route_prefix under ServeApplicationSchema instead." - ), - ) max_ongoing_requests: int = Field( default=DEFAULT.VALUE, description=( @@ -444,10 +437,6 @@ def validate_max_queued_requests(cls, values): return values - deployment_schema_route_prefix_format = validator("route_prefix", allow_reuse=True)( - _route_prefix_format - ) - def _get_user_configured_option_names(self) -> Set[str]: """Get set of names for all user-configured options. @@ -460,12 +449,7 @@ def _get_user_configured_option_names(self) -> Set[str]: def _deployment_info_to_schema(name: str, info: DeploymentInfo) -> DeploymentSchema: - """Converts a DeploymentInfo object to DeploymentSchema. - - Route_prefix will not be set in the returned DeploymentSchema, since starting in 2.x - route_prefix is an application-level concept. (This should only be used on the 2.x - codepath) - """ + """Converts a DeploymentInfo object to DeploymentSchema.""" schema = DeploymentSchema( name=name, @@ -957,21 +941,6 @@ class DeploymentDetails(BaseModel, extra=Extra.forbid, frozen=True): description="Details about the live replicas of this deployment." ) - @validator("deployment_config") - def deployment_route_prefix_not_set(cls, v: DeploymentSchema): - # Route prefix should not be set at the deployment level. Deployment-level route - # prefix is outdated, there should be one route prefix per application - if ( - "route_prefix" in v.__fields_set__ - ): # in Pydantic v2, this becomes `in v.model_fields_set` - raise ValueError( - "Unexpectedly found a deployment-level route_prefix in the " - f'deployment_config for deployment "{v.name}". The route_prefix in ' - "deployment_config within DeploymentDetails should not be set; please " - "set it at the application level." - ) - return v - @PublicAPI(stability="alpha") class APIType(str, Enum): diff --git a/python/ray/serve/scripts.py b/python/ray/serve/scripts.py index dbad99756546..270dc4a032b4 100644 --- a/python/ray/serve/scripts.py +++ b/python/ray/serve/scripts.py @@ -27,10 +27,6 @@ SERVE_NAMESPACE, ) from ray.serve._private.deployment_graph_build import build as pipeline_build -from ray.serve._private.deployment_graph_build import ( - get_and_validate_ingress_deployment, -) -from ray.serve._private.utils import DEFAULT from ray.serve.config import DeploymentMode, ProxyLocation, gRPCOptions from ray.serve.deployment import Application, deployment_to_schema from ray.serve.schema import ( @@ -438,6 +434,7 @@ def deploy( "--route-prefix", required=False, type=str, + default="/", help=( "Route prefix for the application. This should only be used " "when running an application specified by import path and " @@ -465,11 +462,9 @@ def run( address: str, blocking: bool, reload: bool, - route_prefix: Optional[str], + route_prefix: str, name: str, ): - if route_prefix is None: - route_prefix = DEFAULT.VALUE sys.path.insert(0, app_dir) args_dict = convert_args_to_dict(arguments) final_runtime_env = parse_runtime_env_args( @@ -800,15 +795,12 @@ def build_app_config(import_path: str, name: str = None): ) deployments = pipeline_build(app, name) - ingress = get_and_validate_ingress_deployment(deployments) schema = ServeApplicationSchema( name=name, - route_prefix=ingress.route_prefix, + route_prefix="/" if len(import_paths) == 1 else f"/{name}", import_path=import_path, runtime_env={}, - deployments=[ - deployment_to_schema(d, include_route_prefix=False) for d in deployments - ], + deployments=[deployment_to_schema(d) for d in deployments], ) return schema.dict(exclude_unset=True) diff --git a/python/ray/serve/tests/BUILD b/python/ray/serve/tests/BUILD index 8dc26704eb75..e666ead5ec62 100644 --- a/python/ray/serve/tests/BUILD +++ b/python/ray/serve/tests/BUILD @@ -24,8 +24,6 @@ py_test_module_list( "test_constructor_failure.py", "test_controller.py", "test_deployment_graph_handle_serde.py", - "test_deployment_graph_ingress.py", - "test_deployment_node.py", "test_deployment_scheduler.py", "test_deployment_version.py", "test_expected_versions.py", diff --git a/python/ray/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py index 6b151b7c2aa0..36b87f8955f3 100644 --- a/python/ray/serve/tests/test_api.py +++ b/python/ray/serve/tests/test_api.py @@ -349,7 +349,7 @@ def f(): def g(): return "got g" - @serve.deployment(route_prefix="/my_prefix") + @serve.deployment def h(): return "got h" @@ -360,7 +360,7 @@ def __call__(self, *args): app = FastAPI() - @serve.deployment(route_prefix="/hello") + @serve.deployment @serve.ingress(app) class MyFastAPIDeployment: @app.get("/") @@ -379,12 +379,12 @@ def root(self): # Test function deployment with app name and route_prefix set in deployment # decorator - h_handle = serve.run(h.bind(), name="app_h") + h_handle = serve.run(h.bind(), name="app_h", route_prefix="/my_prefix") assert h_handle.remote().result() == "got h" assert requests.get("http://127.0.0.1:8000/my_prefix").text == "got h" # Test FastAPI - serve.run(MyFastAPIDeployment.bind(), name="FastAPI") + serve.run(MyFastAPIDeployment.bind(), name="FastAPI", route_prefix="/hello") assert requests.get("http://127.0.0.1:8000/hello").text == '"Hello, world!"' @@ -551,55 +551,6 @@ def __call__(self): assert requests.get("http://127.0.0.1:8000/").text == "got model" -@pytest.mark.parametrize( - "ingress_route,app_route", - [ - ("/hello", "/"), - ("/hello", "/override"), - ("/", "/override"), - (None, "/override"), - ("/hello", None), - (None, None), - ], -) -def test_application_route_prefix_override(serve_instance, ingress_route, app_route): - """ - Set route prefix in serve.run to a non-None value, check it overrides correctly. - """ - - @serve.deployment - def f(): - return "hello" - - node = f.options(route_prefix=ingress_route).bind() - serve.run(node, route_prefix=app_route) - if app_route is None: - routes = requests.get("http://localhost:8000/-/routes").json() - assert len(routes) == 0 - else: - assert requests.get(f"http://localhost:8000{app_route}").text == "hello" - - -@pytest.mark.parametrize("ingress_route", ["/hello", "/"]) -def test_application_route_prefix_override1(serve_instance, ingress_route): - """ - Don't set route prefix in serve.run, check it always uses the ingress deployment - route. - """ - - @serve.deployment - def f(): - return "hello" - - node = f.options(route_prefix=ingress_route).bind() - serve.run(node) - if ingress_route is None: - routes = requests.get("http://localhost:8000/-/routes").json() - assert len(routes) == 0 - else: - assert requests.get(f"http://localhost:8000{ingress_route}").text == "hello" - - class TestAppBuilder: @serve.deployment class A: @@ -807,7 +758,11 @@ def f(): pass with pytest.raises( - ValueError, match=r"The route_prefix must start with a forward slash \('/'\)" + ValueError, + match=( + r"Invalid route_prefix 'no_slash', " + "must start with a forward slash \('/'\)" + ), ): serve.run(f.bind(), route_prefix="no_slash") diff --git a/python/ray/serve/tests/test_cli_2.py b/python/ray/serve/tests/test_cli_2.py index 3d45ea2ea31a..51e5a75124f6 100644 --- a/python/ray/serve/tests/test_cli_2.py +++ b/python/ray/serve/tests/test_cli_2.py @@ -529,8 +529,8 @@ async def __call__(self): TestBuildDagNode = NoArgDriver.bind(TestBuildFNode) -TestApp1Node = global_f.options(route_prefix="/app1").bind() -TestApp2Node = NoArgDriver.options(route_prefix="/app2").bind(global_f.bind()) +TestApp1Node = global_f.options(name="app1").bind() +TestApp2Node = NoArgDriver.options(name="app2").bind(global_f.bind()) @pytest.mark.skipif(sys.platform == "win32", reason="File path incorrect on Windows.") @@ -948,39 +948,6 @@ def test_grpc_proxy_model_composition(ray_start_stop): ping_fruit_stand(channel, app) -@serve.deployment(route_prefix="/foo") -async def deployment_with_route_prefix(args): - return "bar..." - - -route_prefix_app = deployment_with_route_prefix.bind() - - -@pytest.mark.skipif(sys.platform == "win32", reason="File path incorrect on Windows.") -def test_serve_run_mount_to_correct_deployment_route_prefix(ray_start_stop): - """Test running serve run with deployment with route_prefix should mount the - deployment to the correct route.""" - - import_path = "ray.serve.tests.test_cli_2.route_prefix_app" - subprocess.Popen(["serve", "run", import_path]) - - # /-/routes should show the app having the correct route. - wait_for_condition( - lambda: requests.get("http://localhost:8000/-/routes").text - == '{"/foo":"default"}' - ) - - # Ping root path directly should 404. - wait_for_condition( - lambda: requests.get("http://localhost:8000/").status_code == 404 - ) - - # Ping the mounting route should return 200. - wait_for_condition( - lambda: requests.get("http://localhost:8000/foo").status_code == 200 - ) - - @pytest.mark.skipif(sys.platform == "win32", reason="File path incorrect on Windows.") def test_control_c_shutdown_serve_components(ray_start_stop): """Test ctrl+c after `serve run` shuts down serve components.""" diff --git a/python/ray/serve/tests/test_deploy.py b/python/ray/serve/tests/test_deploy.py index 7cde6f9f73a9..9b8a92551fbe 100644 --- a/python/ray/serve/tests/test_deploy.py +++ b/python/ray/serve/tests/test_deploy.py @@ -610,7 +610,6 @@ class DClass: num_replicas=2, user_config="hi", max_ongoing_requests=100, - route_prefix="/hello", ray_actor_options={"num_cpus": 2}, )(DClass) @@ -619,15 +618,12 @@ class DClass: assert D.num_replicas == 2 assert D.user_config == "hi" assert D.max_ongoing_requests == 100 - assert D.route_prefix == "/hello" assert D.ray_actor_options == {"num_cpus": 2} D = serve.deployment( version=None, - route_prefix=None, )(DClass) assert D.version is None - assert D.route_prefix is None if __name__ == "__main__": diff --git a/python/ray/serve/tests/test_deploy_2.py b/python/ray/serve/tests/test_deploy_2.py index fd5cb4f59eb6..ba09f37ff70a 100644 --- a/python/ray/serve/tests/test_deploy_2.py +++ b/python/ray/serve/tests/test_deploy_2.py @@ -21,25 +21,6 @@ from ray.util.state import list_actors -@pytest.mark.parametrize("prefixes", [[None, "/f", None], ["/f", None, "/f"]]) -def test_deploy_nullify_route_prefix(serve_instance, prefixes): - # With multi dags support, dag driver will receive all route - # prefix when route_prefix is "None", since "None" will be converted - # to "/" internally. - # Note: the expose http endpoint will still be removed for internal - # dag node by setting "None" to route_prefix - @serve.deployment - def f(*args): - return "got me" - - for prefix in prefixes: - dag = f.options(route_prefix=prefix).bind() - handle = serve.run(dag) - assert requests.get("http://localhost:8000/f").status_code == 200 - assert requests.get("http://localhost:8000/f").text == "got me" - assert handle.remote().result() == "got me" - - @pytest.mark.timeout(10, method="thread") def test_deploy_empty_bundle(serve_instance): @serve.deployment(ray_actor_options={"num_cpus": 0}) diff --git a/python/ray/serve/tests/test_deploy_app.py b/python/ray/serve/tests/test_deploy_app.py index 0bea9ea6d25b..f1a3548faf30 100644 --- a/python/ray/serve/tests/test_deploy_app.py +++ b/python/ray/serve/tests/test_deploy_app.py @@ -1115,53 +1115,6 @@ def test_get_app_handle(client: ServeControllerClient): assert handle_2.route.remote("ADD", 2).result() == "5 pizzas please!" -@pytest.mark.parametrize("heavyweight", [True, False]) -def test_deploy_lightweight_multiple_route_prefix( - client: ServeControllerClient, heavyweight: bool -): - """If user deploys a config that sets route prefix for a non-ingress deployment, - the deploy should fail. - """ - - config = { - "applications": [ - { - "name": "default", - "import_path": "ray.serve.tests.test_config_files.world.DagNode", - } - ] - } - client.deploy_apps(ServeDeploySchema(**config)) - - def check(): - assert requests.post("http://localhost:8000/").text == "wonderful world" - return True - - wait_for_condition(check) - - # Add route prefix for non-ingress deployment - config["applications"][0]["deployments"] = [{"name": "f", "route_prefix": "/"}] - if heavyweight: - # Trigger re-build of the application - config["applications"][0]["runtime_env"] = {"env_vars": {"test": "3"}} - client.deploy_apps(ServeDeploySchema(**config)) - - def check_failed(): - s = serve.status().applications["default"] - assert s.status == ApplicationStatus.DEPLOY_FAILED - assert "Found multiple route prefixes" in s.message - return True - - wait_for_condition(check_failed) - - # Check 10 more times to make sure the status doesn't oscillate - for _ in range(10): - s = serve.status().applications["default"] - assert s.status == ApplicationStatus.DEPLOY_FAILED - assert "Found multiple route prefixes" in s.message - time.sleep(0.1) - - @pytest.mark.parametrize("rebuild", [True, False]) def test_redeploy_old_config_after_failed_deployment( client: ServeControllerClient, rebuild @@ -1197,9 +1150,9 @@ def check_application_running(): ] = "ray.serve.tests.test_config_files.import_error.app" err_msg = "ZeroDivisionError" else: - # Trying to add a route prefix for non-ingress deployment will fail - new_app_config["deployments"] = [{"name": "f", "route_prefix": "/"}] - err_msg = "Found multiple route prefixes" + # Set config for a nonexistent deployment + new_app_config["deployments"] = [{"name": "nonexistent", "num_replicas": 1}] + err_msg = "nonexistent deployment 'nonexistent'" client.deploy_apps(ServeDeploySchema(**{"applications": [new_app_config]})) def check_deploy_failed(message): diff --git a/python/ray/serve/tests/test_deployment_graph_ingress.py b/python/ray/serve/tests/test_deployment_graph_ingress.py deleted file mode 100644 index b98b6edab19d..000000000000 --- a/python/ray/serve/tests/test_deployment_graph_ingress.py +++ /dev/null @@ -1,116 +0,0 @@ -import sys - -import pytest - -from ray import serve -from ray.serve._private.deployment_graph_build import build as pipeline_build -from ray.serve._private.deployment_graph_build import ( - get_and_validate_ingress_deployment, -) - - -@serve.deployment -class Model: - def __init__(self, val): - self.val = val - - def forward(self, input): - return self.val + input - - -@serve.deployment -def func_deployment(): - return "hello" - - -@serve.deployment -def combine(input_1, input_2): - return input_1 + input_2 - - -@serve.deployment -class Driver: - def __init__(self, dag): - self.dag = dag - - async def __call__(self, inp): - return await self.dag.remote(inp) - - -def test_http_user_bring_own_driver_route_prefix(serve_instance): - m1 = Model.bind(1) - serve_dag = Driver.options(route_prefix="/hello").bind(m1) - - deployments = pipeline_build(serve_dag) - ingress_deployment = get_and_validate_ingress_deployment(deployments) - assert ingress_deployment.route_prefix == "/hello" - for deployment in deployments[:-1]: - assert deployment.route_prefix is None - - -def test_http_no_non_ingress_deployment_route_prefix(serve_instance): - m1 = Model.options(route_prefix="/should-fail").bind(1) - serve_dag = Driver.bind(m1) - - with pytest.raises( - ValueError, - match="Route prefix is only configurable on the ingress deployment", - ): - _ = pipeline_build(serve_dag) - - -def test_http_we_provide_default_route_prefix_cls(serve_instance): - """Ensure the default ingress deployment route is '/' instead of driver - class name - """ - - m1 = Model.bind(1) - serve_dag = Driver.bind(m1) - - deployments = pipeline_build(serve_dag) - ingress_deployment = get_and_validate_ingress_deployment(deployments) - assert ingress_deployment.route_prefix == "/" - for deployment in deployments[:-1]: - assert deployment.route_prefix is None - - -def test_http_we_provide_default_route_prefix_func(serve_instance): - """Ensure the default ingress deployment route is '/' instead of driver - function name - """ - func_dag = func_deployment.bind() - deployments = pipeline_build(func_dag) - ingress_deployment = get_and_validate_ingress_deployment(deployments) - assert ingress_deployment.route_prefix == "/" - - -def test_http_non_default_route_prefix_on_non_root_node(serve_instance): - m1 = Model.bind(1) - serve_dag = Driver.bind(m1) - - deployments = pipeline_build(serve_dag) - non_root_deployment = deployments[0].options(route_prefix="/") - deployments[0] = non_root_deployment - - with pytest.raises( - ValueError, - match=( - "Only one deployment in an Serve Application or DAG can have " - "non-None route prefix" - ), - ): - _ = get_and_validate_ingress_deployment(deployments) - - -def test_http_reconfigure_non_default_route_prefix_on_root(serve_instance): - m1 = Model.bind(1) - serve_dag = Driver.bind(m1) - - deployments = pipeline_build(serve_dag) - non_root_deployment = deployments[-1].options(route_prefix="/yoo") - deployments[-1] = non_root_deployment - _ = get_and_validate_ingress_deployment(deployments) - - -if __name__ == "__main__": - sys.exit(pytest.main(["-v", "-s", __file__])) diff --git a/python/ray/serve/tests/test_deployment_node.py b/python/ray/serve/tests/test_deployment_node.py deleted file mode 100644 index dbd17ed24bb2..000000000000 --- a/python/ray/serve/tests/test_deployment_node.py +++ /dev/null @@ -1,82 +0,0 @@ -import pytest - -import ray -from ray import serve -from ray.serve._private.deployment_node import DeploymentNode - - -@serve.deployment -class ServeActor: - def __init__(self, init_value=0): - self.i = init_value - - def inc(self): - self.i += 1 - - def get(self): - return self.i - - -class SyncActor: - def __init__(self, init_value=0): - self.i = init_value - - def inc(self): - self.i += 1 - - def get(self): - return self.i - - -class Actor: - def __init__(self, init_value=0): - self.i = init_value - - async def inc(self): - self.i += 1 - - async def get(self): - return self.i - - -@pytest.mark.skip(reason="async handle not enabled yet") -async def test_simple_deployment_async(serve_instance): - """Internal testing only for simple creation and execution. - - User should NOT directly create instances of Deployment or DeploymentNode. - """ - node = DeploymentNode( - Actor, - "test", - (10,), - {}, - {}, - ) - node._deployment._deploy() - handle = node._deployment_handle - - assert ray.get(await node.get.execute()) == 10 - ray.get(await node.inc.execute()) - assert ray.get(await node.get.execute()) == 11 - assert ray.get(await node.get.execute()) == ray.get(await handle.get.remote()) - - -def test_mix_sync_async_handle(serve_instance): - # TODO: (jiaodong) Add complex multi-deployment tests from ray DAG. - pass - - -def test_deployment_node_as_init_args(serve_instance): - # TODO: (jiaodong) Add complex multi-deployment tests from ray DAG. - pass - - -def test_multi_deployment_nodes(serve_instance): - # TODO: (jiaodong) Add complex multi-deployment tests from ray DAG. - pass - - -if __name__ == "__main__": - import sys - - sys.exit(pytest.main(["-v", __file__])) diff --git a/python/ray/serve/tests/test_fastapi.py b/python/ray/serve/tests/test_fastapi.py index 57ec2f20eeea..6c4e0b7ab4c8 100644 --- a/python/ray/serve/tests/test_fastapi.py +++ b/python/ray/serve/tests/test_fastapi.py @@ -30,7 +30,6 @@ from ray.serve._private.client import ServeControllerClient from ray.serve._private.constants import SERVE_DEFAULT_APP_NAME from ray.serve._private.http_util import make_fastapi_class_based_view -from ray.serve._private.utils import DEFAULT from ray.serve.exceptions import RayServeException from ray.serve.handle import DeploymentHandle @@ -440,7 +439,7 @@ class MyApp: @pytest.mark.parametrize( "input_route_prefix,expected_route_prefix", - [(DEFAULT.VALUE, "/"), ("/", "/"), ("/subpath", "/subpath/")], + [("/", "/"), ("/subpath", "/subpath/")], ) def test_doc_generation(serve_instance, input_route_prefix, expected_route_prefix): app = FastAPI() diff --git a/python/ray/serve/tests/test_http_routes.py b/python/ray/serve/tests/test_http_routes.py index 602b9ef006e0..4d7cf3e153a7 100644 --- a/python/ray/serve/tests/test_http_routes.py +++ b/python/ray/serve/tests/test_http_routes.py @@ -11,38 +11,45 @@ def test_path_validation(serve_instance): + @serve.deployment + class D: + pass + # Path prefix must start with /. with pytest.raises(ValueError): - - @serve.deployment(route_prefix="hello") - class D1: - pass + serve.run(D.bind(), route_prefix="hello") # Path prefix must not end with / unless it's the root. with pytest.raises(ValueError): - - @serve.deployment(route_prefix="/hello/") - class D2: - pass + serve.run(D.bind(), route_prefix="/hello/") # Wildcards not allowed with new ingress support. with pytest.raises(ValueError): + serve.run(D.bind(), route_prefix="/{hello}") - @serve.deployment(route_prefix="/{hello}") - class D3: - pass - @serve.deployment(route_prefix="/duplicate") - class D4: - pass +def test_routes_healthz(serve_instance): + # Should return 503 until there are any routes populated. + resp = requests.get("http://localhost:8000/-/healthz") + assert resp.status_code == 503 + assert resp.text == "Route table is not populated yet." - serve.run(D4.bind()) + @serve.deployment + class D1: + def __call__(self, *args): + return "hi" + # D1 not exposed over HTTP so should still return 503. + serve.run(D1.bind(), route_prefix=None) + resp = requests.get("http://localhost:8000/-/healthz") + assert resp.status_code == 503 + assert resp.text == "Route table is not populated yet." -def test_routes_healthz(serve_instance): + # D1 exposed over HTTP, should return 200 OK. + serve.run(D1.bind(), route_prefix="/") resp = requests.get("http://localhost:8000/-/healthz") assert resp.status_code == 200 - assert resp.content == b"success" + assert resp.text == "success" def test_routes_endpoint(serve_instance): @@ -72,7 +79,7 @@ def __call__(self, *args): def test_deployment_without_route(serve_instance): - @serve.deployment(route_prefix=None) + @serve.deployment class D: def __call__(self, *args): return "1" @@ -131,7 +138,7 @@ def __call__(self, *args): check_req("/", text="2") check_req("/a", text="2") - @serve.deployment(route_prefix="/hello/world") + @serve.deployment class D3: def __call__(self, *args): return "3" diff --git a/python/ray/serve/tests/test_standalone.py b/python/ray/serve/tests/test_standalone.py index 0936432dc3aa..bb76cb36c31f 100644 --- a/python/ray/serve/tests/test_standalone.py +++ b/python/ray/serve/tests/test_standalone.py @@ -27,7 +27,6 @@ SERVE_DEFAULT_APP_NAME, SERVE_NAMESPACE, SERVE_PROXY_NAME, - SERVE_ROOT_URL_ENV_KEY, ) from ray.serve._private.default_impl import create_cluster_node_info_cache from ray.serve._private.http_util import set_socket_reuse_port @@ -418,40 +417,6 @@ class Dummy: assert resp.headers["access-control-allow-origin"] == "*" -@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows") -def test_http_root_url(ray_shutdown): - @serve.deployment - def f(_): - pass - - root_url = "https://my.domain.dev/prefix" - - port = new_port() - os.environ[SERVE_ROOT_URL_ENV_KEY] = root_url - serve.start(http_options=dict(port=port)) - serve.run(f.bind()) - assert f.url == root_url + "/f" - serve.shutdown() - ray.shutdown() - del os.environ[SERVE_ROOT_URL_ENV_KEY] - - port = new_port() - serve.start(http_options=dict(port=port)) - serve.run(f.bind()) - assert f.url != root_url + "/f" - assert f.url == f"http://127.0.0.1:{port}/f" - serve.shutdown() - ray.shutdown() - - ray.init(runtime_env={"env_vars": {SERVE_ROOT_URL_ENV_KEY: root_url}}) - port = new_port() - serve.start(http_options=dict(port=port)) - serve.run(f.bind()) - assert f.url == root_url + "/f" - serve.shutdown() - ray.shutdown() - - @pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows") def test_http_root_path(ray_shutdown): @serve.deployment @@ -463,11 +428,8 @@ def hello(): serve.start(http_options=dict(root_path=root_path, port=port)) serve.run(hello.bind(), route_prefix="/hello") - # check whether url is prefixed correctly - assert hello.url == f"http://127.0.0.1:{port}{root_path}/hello" - # check routing works as expected - resp = requests.get(hello.url) + resp = requests.get(f"http://127.0.0.1:{port}{root_path}/hello") assert resp.status_code == 200 assert resp.text == "hello" @@ -536,7 +498,7 @@ def test_http_head_only(ray_cluster): cpu_per_nodes = { r["CPU"] for r in ray._private.state.available_resources_per_node().values() } - assert cpu_per_nodes == {4, 4} + assert cpu_per_nodes == {4} def test_serve_shutdown(ray_shutdown): diff --git a/python/ray/serve/tests/test_websockets.py b/python/ray/serve/tests/test_websockets.py index 05a2da556a81..c4d7cca7b262 100644 --- a/python/ray/serve/tests/test_websockets.py +++ b/python/ray/serve/tests/test_websockets.py @@ -28,10 +28,7 @@ async def ws_handler(self, ws: WebSocket): bytes = await ws.receive_bytes() await ws.send_bytes(bytes) - if route_prefix is not None: - WebSocketServer = WebSocketServer.options(route_prefix=route_prefix) - - serve.run(WebSocketServer.bind()) + serve.run(WebSocketServer.bind(), route_prefix=route_prefix or "/") msg = "Hello world!" if route_prefix: diff --git a/python/ray/serve/tests/unit/test_application_state.py b/python/ray/serve/tests/unit/test_application_state.py index b0c96f278757..108ce319a9e4 100644 --- a/python/ray/serve/tests/unit/test_application_state.py +++ b/python/ray/serve/tests/unit/test_application_state.py @@ -1141,19 +1141,7 @@ def test_override_autoscaling_config(self, info): assert updated_info.deployment_config.autoscaling_config.initial_replicas == 12 assert updated_info.deployment_config.autoscaling_config.max_replicas == 79 - def test_override_route_prefix_1(self, info): - config = ServeApplicationSchema( - name="default", - import_path="test.import.path", - deployments=[DeploymentSchema(name="A", route_prefix="/alice")], - ) - - updated_infos = override_deployment_info({"A": info}, config) - updated_info = updated_infos["A"] - assert updated_info.route_prefix == "/alice" - assert updated_info.version == "123" - - def test_override_route_prefix_2(self, info): + def test_override_route_prefix(self, info): config = ServeApplicationSchema( name="default", import_path="test.import.path", @@ -1170,19 +1158,6 @@ def test_override_route_prefix_2(self, info): assert updated_info.route_prefix == "/bob" assert updated_info.version == "123" - def test_override_route_prefix_3(self, info): - config = ServeApplicationSchema( - name="default", - import_path="test.import.path", - route_prefix="/bob", - deployments=[DeploymentSchema(name="A", route_prefix="/alice")], - ) - - updated_infos = override_deployment_info({"A": info}, config) - updated_info = updated_infos["A"] - assert updated_info.route_prefix == "/bob" - assert updated_info.version == "123" - def test_override_ray_actor_options_1(self, info): """Test runtime env specified in config at deployment level.""" config = ServeApplicationSchema( diff --git a/python/ray/serve/tests/unit/test_deployment_class.py b/python/ray/serve/tests/unit/test_deployment_class.py index 54dd23d0b9f2..75743f6184ac 100644 --- a/python/ray/serve/tests/unit/test_deployment_class.py +++ b/python/ray/serve/tests/unit/test_deployment_class.py @@ -81,7 +81,6 @@ class TestDeploymentOptions: "name": "test", "version": "abcd", "num_replicas": 1, - "route_prefix": "/", "ray_actor_options": {}, "user_config": {}, "max_ongoing_requests": 10, @@ -178,7 +177,6 @@ def f(): "option", [ "num_replicas", - "route_prefix", "autoscaling_config", "user_config", ], diff --git a/python/ray/serve/tests/unit/test_schema.py b/python/ray/serve/tests/unit/test_schema.py index 1959cf20bb98..c7bc18788c7d 100644 --- a/python/ray/serve/tests/unit/test_schema.py +++ b/python/ray/serve/tests/unit/test_schema.py @@ -288,39 +288,6 @@ def test_validate_max_queued_requests(self): with pytest.raises(ValidationError): DeploymentSchema.parse_obj(deployment_schema) - def test_route_prefix(self): - # Ensure that route_prefix is validated - - deployment_schema = self.get_minimal_deployment_schema() - - # route_prefix must start with a "/" - deployment_schema["route_prefix"] = "hello/world" - with pytest.raises(ValueError): - DeploymentSchema.parse_obj(deployment_schema) - - # route_prefix must end with a "/" - deployment_schema["route_prefix"] = "/hello/world/" - with pytest.raises(ValueError): - DeploymentSchema.parse_obj(deployment_schema) - - # route_prefix cannot contain wildcards, meaning it can't have - # "{" or "}" - deployment_schema["route_prefix"] = "/hello/{adjective}/world/" - with pytest.raises(ValueError): - DeploymentSchema.parse_obj(deployment_schema) - - # Ensure a valid route_prefix works - deployment_schema["route_prefix"] = "/hello/wonderful/world" - DeploymentSchema.parse_obj(deployment_schema) - - # Ensure route_prefix of "/" works - deployment_schema["route_prefix"] = "/" - DeploymentSchema.parse_obj(deployment_schema) - - # Ensure route_prefix of None works - deployment_schema["route_prefix"] = None - DeploymentSchema.parse_obj(deployment_schema) - def test_mutually_exclusive_num_replicas_and_autoscaling_config(self): # num_replicas and autoscaling_config cannot be set at the same time deployment_schema = self.get_minimal_deployment_schema() @@ -757,7 +724,6 @@ def global_f(): def test_deployment_to_schema_to_deployment(): @serve.deployment( num_replicas=3, - route_prefix="/hello", ray_actor_options={ "runtime_env": { "working_dir": TEST_MODULE_PINNED_URI, @@ -778,7 +744,6 @@ def f(): ) assert deployment.num_replicas == 3 - assert deployment.route_prefix == "/hello" assert ( deployment.ray_actor_options["runtime_env"]["working_dir"] == TEST_MODULE_PINNED_URI @@ -794,7 +759,6 @@ def test_unset_fields_schema_to_deployment_ray_actor_options(): @serve.deployment( num_replicas=3, - route_prefix="/hello", ray_actor_options={}, ) def f(): diff --git a/release/golden_notebook_tests/workloads/torch_tune_serve_test.py b/release/golden_notebook_tests/workloads/torch_tune_serve_test.py index bace953e5ba6..f6add0c302df 100644 --- a/release/golden_notebook_tests/workloads/torch_tune_serve_test.py +++ b/release/golden_notebook_tests/workloads/torch_tune_serve_test.py @@ -151,7 +151,7 @@ def get_model(checkpoint_dir: str): return model -@serve.deployment(name="mnist", route_prefix="/mnist") +@serve.deployment(name="mnist") class MnistDeployment: def __init__(self, model): use_cuda = torch.cuda.is_available() @@ -184,7 +184,8 @@ def setup_serve(model, use_gpu: bool = False): ray_actor_options={"num_gpus": 1, "resources": {"worker": 1}} if use_gpu else {}, - ).bind(model) + ).bind(model), + route_prefix="/mnist", )