diff --git a/xinference/deploy/cmdline.py b/xinference/deploy/cmdline.py
index 6b67c67c61..d07ab46692 100644
--- a/xinference/deploy/cmdline.py
+++ b/xinference/deploy/cmdline.py
@@ -84,12 +84,39 @@ def get_endpoint(endpoint: Optional[str]) -> str:
         return endpoint
 
 
-@click.group(invoke_without_command=True, name="xinference")
+@click.group(
+    invoke_without_command=True,
+    name="xinference",
+    help="Xinference command-line interface for serving and deploying models.",
+)
 @click.pass_context
-@click.version_option(__version__, "--version", "-v")
-@click.option("--log-level", default="INFO", type=str)
-@click.option("--host", "-H", default=XINFERENCE_DEFAULT_LOCAL_HOST, type=str)
-@click.option("--port", "-p", default=XINFERENCE_DEFAULT_ENDPOINT_PORT, type=int)
+@click.version_option(
+    __version__,
+    "--version",
+    "-v",
+    help="Show the current version of the Xinference tool.",
+)
+@click.option(
+    "--log-level",
+    default="INFO",
+    type=str,
+    help="""Set the logger level. Options listed from most log to least log are:
+              ALL > TRACE > DEBUG > INFO > WARN > ERROR > FATAL > OFF (Default level is INFO)""",
+)
+@click.option(
+    "--host",
+    "-H",
+    default=XINFERENCE_DEFAULT_LOCAL_HOST,
+    type=str,
+    help="Specify the host address for the Xinference server.",
+)
+@click.option(
+    "--port",
+    "-p",
+    default=XINFERENCE_DEFAULT_ENDPOINT_PORT,
+    type=int,
+    help="Specify the port number for the Xinference server.",
+)
 def cli(
     ctx,
     log_level: str,
@@ -114,10 +141,30 @@ def cli(
         )
 
 
-@click.command()
-@click.option("--log-level", default="INFO", type=str)
-@click.option("--host", "-H", default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST, type=str)
-@click.option("--port", "-p", default=XINFERENCE_DEFAULT_ENDPOINT_PORT, type=int)
+@click.command(
+    help="Starts a Xinference supervisor to control and monitor the worker actors."
+)
+@click.option(
+    "--log-level",
+    default="INFO",
+    type=str,
+    help="""Set the logger level for the supervisor. Options listed from most log to least log are:
+              ALL > TRACE > DEBUG > INFO > WARN > ERROR > FATAL > OFF (Default level is INFO)""",
+)
+@click.option(
+    "--host",
+    "-H",
+    default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST,
+    type=str,
+    help="Specify the host address for the supervisor.",
+)
+@click.option(
+    "--port",
+    "-p",
+    default=XINFERENCE_DEFAULT_ENDPOINT_PORT,
+    type=int,
+    help="Specify the port number for the supervisor.",
+)
 def supervisor(
     log_level: str,
     host: str,
@@ -134,14 +181,26 @@ def supervisor(
     main(address=address, host=host, port=port, logging_conf=logging_conf)
 
 
-@click.command()
-@click.option("--log-level", default="INFO", type=str)
+@click.command(
+    help="Starts a Xinference worker to execute tasks assigned by the supervisor in a distributed setup."
+)
 @click.option(
-    "--endpoint",
-    "-e",
+    "--log-level",
+    default="INFO",
+    type=str,
+    help="""Set the logger level for the worker. Options listed from most log to least log are:
+              ALL > TRACE > DEBUG > INFO > WARN > ERROR > FATAL > OFF (Default level is INFO)""",
+)
+@click.option(
+    "--endpoint", "-e", type=str, help="Specify the endpoint URL for the worker."
+)
+@click.option(
+    "--host",
+    "-H",
+    default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST,
     type=str,
+    help="Specify the host address for the worker.",
 )
-@click.option("--host", "-H", default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST, type=str)
 def worker(log_level: str, endpoint: Optional[str], host: str):
     from ..deploy.worker import main
 
@@ -163,15 +222,24 @@ def worker(log_level: str, endpoint: Optional[str], host: str):
     )
 
 
-@cli.command("register")
+@cli.command("register", help="Registers a new model with Xinference for deployment.")
 @click.option(
-    "--endpoint",
-    "-e",
+    "--endpoint", "-e", type=str, help="Endpoint URL for registering the model."
+)
+@click.option(
+    "--model-type",
+    "-t",
+    default="LLM",
     type=str,
+    help="Type of model to register (default is 'LLM').",
+)
+@click.option("--file", "-f", type=str, help="Path to the file containing the model.")
+@click.option(
+    "--persist",
+    "-p",
+    is_flag=True,
+    help="Persist the model to the filesystem, retains the model after server restarts.",
 )
-@click.option("--model-type", "-t", default="LLM", type=str)
-@click.option("--file", "-f", type=str)
-@click.option("--persist", "-p", is_flag=True)
 def register_model(
     endpoint: Optional[str],
     model_type: str,
@@ -190,14 +258,21 @@ def register_model(
     )
 
 
-@cli.command("unregister")
+@cli.command(
+    "unregister",
+    help="Unregisters a model from Xinference, removing it from deployment.",
+)
 @click.option(
-    "--endpoint",
-    "-e",
+    "--endpoint", "-e", type=str, help="Endpoint URL for unregistering the model."
+)
+@click.option(
+    "--model-type",
+    "-t",
+    default="LLM",
     type=str,
+    help="Type of model to unregister (default is 'LLM').",
 )
-@click.option("--model-type", "-t", default="LLM", type=str)
-@click.option("--model-name", "-n", type=str)
+@click.option("--model-name", "-n", type=str, help="Name of the model to unregister.")
 def unregister_model(
     endpoint: Optional[str],
     model_type: str,
@@ -212,13 +287,20 @@ def unregister_model(
     )
 
 
-@cli.command("registrations")
+@cli.command("registrations", help="Lists all registered models in Xinference.")
 @click.option(
     "--endpoint",
     "-e",
     type=str,
+    help="Endpoint URL to retrieve the list of registrations.",
+)
+@click.option(
+    "--model-type",
+    "-t",
+    default="LLM",
+    type=str,
+    help="Filter by model type (default is 'LLM').",
 )
-@click.option("--model-type", "-t", default="LLM", type=str)
 def list_model_registrations(
     endpoint: Optional[str],
     model_type: str,
@@ -249,16 +331,44 @@ def list_model_registrations(
     )
 
 
-@cli.command("launch")
+@cli.command(
+    "launch",
+    help="Launch a model with the Xinference framework with the given parameters.",
+)
 @click.option(
     "--endpoint",
     "-e",
     type=str,
+    help="Specify the endpoint URL for launching the model.",
+)
+@click.option(
+    "--model-name",
+    "-n",
+    type=str,
+    required=True,
+    help="Provide the name of the model to be launched.",
+)
+@click.option(
+    "--size-in-billions",
+    "-s",
+    default=None,
+    type=int,
+    help="Specify the model size in billions of parameters.",
+)
+@click.option(
+    "--model-format",
+    "-f",
+    default=None,
+    type=str,
+    help="Specify the format of the model, e.g. pytorch, ggmlv3, etc.",
+)
+@click.option(
+    "--quantization",
+    "-q",
+    default=None,
+    type=str,
+    help="Define the quantization settings for the model.",
 )
-@click.option("--model-name", "-n", type=str)
-@click.option("--size-in-billions", "-s", default=None, type=int)
-@click.option("--model-format", "-f", default=None, type=str)
-@click.option("--quantization", "-q", default=None, type=str)
 def model_launch(
     endpoint: Optional[str],
     model_name: str,
@@ -279,11 +389,15 @@ def model_launch(
     print(f"Model uid: {model_uid}", file=sys.stderr)
 
 
-@cli.command("list")
+@cli.command(
+    "list",
+    help="List either all built-in models or only the currently deployed models in Xinference.",
+)
 @click.option(
     "--endpoint",
     "-e",
     type=str,
+    help="Specify the endpoint URL for listing models.",
 )
 def model_list(endpoint: Optional[str]):
     from tabulate import tabulate
@@ -320,13 +434,22 @@ def model_list(endpoint: Optional[str]):
     )
 
 
-@cli.command("terminate")
+@cli.command(
+    "terminate",
+    help="Terminate a deployed model through unique identifier (UID) of the model.",
+)
 @click.option(
     "--endpoint",
     "-e",
     type=str,
+    help="Specify the endpoint URL for terminating the model.",
+)
+@click.option(
+    "--model-uid",
+    type=str,
+    required=True,
+    help="Provide the unique identifier (UID) of the model to be terminated.",
 )
-@click.option("--model-uid", type=str)
 def model_terminate(
     endpoint: Optional[str],
     model_uid: str,
@@ -337,15 +460,25 @@ def model_terminate(
     client.terminate_model(model_uid=model_uid)
 
 
-@cli.command("generate")
+@cli.command("generate", help="Generates text using a specified model in Xinference.")
+@click.option("--endpoint", "-e", type=str, help="Endpoint URL for generating text.")
 @click.option(
-    "--endpoint",
-    "-e",
+    "--model-uid",
     type=str,
+    help="Unique identifier of the model to use for text generation.",
+)
+@click.option(
+    "--max_tokens",
+    default=256,
+    type=int,
+    help="Maximum number of tokens in the generated text (default is 256).",
+)
+@click.option(
+    "--stream",
+    default=True,
+    type=bool,
+    help="Whether to stream the generated text. Use 'True' for streaming (default is True).",
 )
-@click.option("--model-uid", type=str)
-@click.option("--max_tokens", default=256, type=int)
-@click.option("--stream", default=True, type=bool)
 def model_generate(
     endpoint: Optional[str],
     model_uid: str,
@@ -417,15 +550,25 @@ async def generate_internal():
             print(f"{response['choices'][0]['text']}\n", file=sys.stdout)
 
 
-@cli.command("chat")
+@cli.command(
+    "chat", help="Engage in a chat session with a specified model in Xinference."
+)
+@click.option("--endpoint", "-e", type=str, help="Endpoint URL for the chat session.")
 @click.option(
-    "--endpoint",
-    "-e",
-    type=str,
+    "--model-uid", type=str, help="Unique identifier of the model to use for chatting."
+)
+@click.option(
+    "--max_tokens",
+    default=256,
+    type=int,
+    help="Maximum number of tokens in each message (default is 256).",
+)
+@click.option(
+    "--stream",
+    default=True,
+    type=bool,
+    help="Whether to stream the chat messages. Use 'True' for streaming (default is True).",
 )
-@click.option("--model-uid", type=str)
-@click.option("--max_tokens", default=256, type=int)
-@click.option("--stream", default=True, type=bool)
 def model_chat(
     endpoint: Optional[str],
     model_uid: str,