Skip to content

Commit

Permalink
add help messages
Browse files Browse the repository at this point in the history
  • Loading branch information
Bojun-Feng committed Aug 21, 2023
1 parent 7ed7a02 commit 400c55a
Showing 1 changed file with 191 additions and 48 deletions.
239 changes: 191 additions & 48 deletions xinference/deploy/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,39 @@ def get_endpoint(endpoint: Optional[str]) -> str:
return endpoint


@click.group(invoke_without_command=True, name="xinference")
@click.group(
invoke_without_command=True,
name="xinference",
help="Xinference command-line interface for serving and deploying models.",
)
@click.pass_context
@click.version_option(__version__, "--version", "-v")
@click.option("--log-level", default="INFO", type=str)
@click.option("--host", "-H", default=XINFERENCE_DEFAULT_LOCAL_HOST, type=str)
@click.option("--port", "-p", default=XINFERENCE_DEFAULT_ENDPOINT_PORT, type=int)
@click.version_option(
__version__,
"--version",
"-v",
help="Show the current version of the Xinference tool.",
)
@click.option(
"--log-level",
default="INFO",
type=str,
help="""Set the logger level. Options listed from most log to least log are:
ALL > TRACE > DEBUG > INFO > WARN > ERROR > FATAL > OFF (Default level is INFO)""",
)
@click.option(
"--host",
"-H",
default=XINFERENCE_DEFAULT_LOCAL_HOST,
type=str,
help="Specify the host address for the Xinference server.",
)
@click.option(
"--port",
"-p",
default=XINFERENCE_DEFAULT_ENDPOINT_PORT,
type=int,
help="Specify the port number for the Xinference server.",
)
def cli(
ctx,
log_level: str,
Expand All @@ -114,10 +141,30 @@ def cli(
)


@click.command()
@click.option("--log-level", default="INFO", type=str)
@click.option("--host", "-H", default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST, type=str)
@click.option("--port", "-p", default=XINFERENCE_DEFAULT_ENDPOINT_PORT, type=int)
@click.command(
help="Starts a Xinference supervisor to control and monitor the worker actors."
)
@click.option(
"--log-level",
default="INFO",
type=str,
help="""Set the logger level for the supervisor. Options listed from most log to least log are:
ALL > TRACE > DEBUG > INFO > WARN > ERROR > FATAL > OFF (Default level is INFO)""",
)
@click.option(
"--host",
"-H",
default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST,
type=str,
help="Specify the host address for the supervisor.",
)
@click.option(
"--port",
"-p",
default=XINFERENCE_DEFAULT_ENDPOINT_PORT,
type=int,
help="Specify the port number for the supervisor.",
)
def supervisor(
log_level: str,
host: str,
Expand All @@ -134,14 +181,26 @@ def supervisor(
main(address=address, host=host, port=port, logging_conf=logging_conf)


@click.command()
@click.option("--log-level", default="INFO", type=str)
@click.command(
help="Starts a Xinference worker to execute tasks assigned by the supervisor in a distributed setup."
)
@click.option(
"--endpoint",
"-e",
"--log-level",
default="INFO",
type=str,
help="""Set the logger level for the worker. Options listed from most log to least log are:
ALL > TRACE > DEBUG > INFO > WARN > ERROR > FATAL > OFF (Default level is INFO)""",
)
@click.option(
"--endpoint", "-e", type=str, help="Specify the endpoint URL for the worker."
)
@click.option(
"--host",
"-H",
default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST,
type=str,
help="Specify the host address for the worker.",
)
@click.option("--host", "-H", default=XINFERENCE_DEFAULT_DISTRIBUTED_HOST, type=str)
def worker(log_level: str, endpoint: Optional[str], host: str):
from ..deploy.worker import main

Expand All @@ -163,15 +222,24 @@ def worker(log_level: str, endpoint: Optional[str], host: str):
)


@cli.command("register")
@cli.command("register", help="Registers a new model with Xinference for deployment.")
@click.option(
"--endpoint",
"-e",
"--endpoint", "-e", type=str, help="Endpoint URL for registering the model."
)
@click.option(
"--model-type",
"-t",
default="LLM",
type=str,
help="Type of model to register (default is 'LLM').",
)
@click.option("--file", "-f", type=str, help="Path to the file containing the model.")
@click.option(
"--persist",
"-p",
is_flag=True,
help="Persist the model to the filesystem, retains the model after server restarts.",
)
@click.option("--model-type", "-t", default="LLM", type=str)
@click.option("--file", "-f", type=str)
@click.option("--persist", "-p", is_flag=True)
def register_model(
endpoint: Optional[str],
model_type: str,
Expand All @@ -190,14 +258,21 @@ def register_model(
)


@cli.command("unregister")
@cli.command(
"unregister",
help="Unregisters a model from Xinference, removing it from deployment.",
)
@click.option(
"--endpoint",
"-e",
"--endpoint", "-e", type=str, help="Endpoint URL for unregistering the model."
)
@click.option(
"--model-type",
"-t",
default="LLM",
type=str,
help="Type of model to unregister (default is 'LLM').",
)
@click.option("--model-type", "-t", default="LLM", type=str)
@click.option("--model-name", "-n", type=str)
@click.option("--model-name", "-n", type=str, help="Name of the model to unregister.")
def unregister_model(
endpoint: Optional[str],
model_type: str,
Expand All @@ -212,13 +287,20 @@ def unregister_model(
)


@cli.command("registrations")
@cli.command("registrations", help="Lists all registered models in Xinference.")
@click.option(
"--endpoint",
"-e",
type=str,
help="Endpoint URL to retrieve the list of registrations.",
)
@click.option(
"--model-type",
"-t",
default="LLM",
type=str,
help="Filter by model type (default is 'LLM').",
)
@click.option("--model-type", "-t", default="LLM", type=str)
def list_model_registrations(
endpoint: Optional[str],
model_type: str,
Expand Down Expand Up @@ -249,16 +331,44 @@ def list_model_registrations(
)


@cli.command("launch")
@cli.command(
"launch",
help="Launch a model with the Xinference framework with the given parameters.",
)
@click.option(
"--endpoint",
"-e",
type=str,
help="Specify the endpoint URL for launching the model.",
)
@click.option(
"--model-name",
"-n",
type=str,
required=True,
help="Provide the name of the model to be launched.",
)
@click.option(
"--size-in-billions",
"-s",
default=None,
type=int,
help="Specify the model size in billions of parameters.",
)
@click.option(
"--model-format",
"-f",
default=None,
type=str,
help="Specify the format of the model, e.g. pytorch, ggmlv3, etc.",
)
@click.option(
"--quantization",
"-q",
default=None,
type=str,
help="Define the quantization settings for the model.",
)
@click.option("--model-name", "-n", type=str)
@click.option("--size-in-billions", "-s", default=None, type=int)
@click.option("--model-format", "-f", default=None, type=str)
@click.option("--quantization", "-q", default=None, type=str)
def model_launch(
endpoint: Optional[str],
model_name: str,
Expand All @@ -279,11 +389,15 @@ def model_launch(
print(f"Model uid: {model_uid}", file=sys.stderr)


@cli.command("list")
@cli.command(
"list",
help="List either all built-in models or only the currently deployed models in Xinference.",
)
@click.option(
"--endpoint",
"-e",
type=str,
help="Specify the endpoint URL for listing models.",
)
def model_list(endpoint: Optional[str]):
from tabulate import tabulate
Expand Down Expand Up @@ -320,13 +434,22 @@ def model_list(endpoint: Optional[str]):
)


@cli.command("terminate")
@cli.command(
"terminate",
help="Terminate a deployed model through unique identifier (UID) of the model.",
)
@click.option(
"--endpoint",
"-e",
type=str,
help="Specify the endpoint URL for terminating the model.",
)
@click.option(
"--model-uid",
type=str,
required=True,
help="Provide the unique identifier (UID) of the model to be terminated.",
)
@click.option("--model-uid", type=str)
def model_terminate(
endpoint: Optional[str],
model_uid: str,
Expand All @@ -337,15 +460,25 @@ def model_terminate(
client.terminate_model(model_uid=model_uid)


@cli.command("generate")
@cli.command("generate", help="Generates text using a specified model in Xinference.")
@click.option("--endpoint", "-e", type=str, help="Endpoint URL for generating text.")
@click.option(
"--endpoint",
"-e",
"--model-uid",
type=str,
help="Unique identifier of the model to use for text generation.",
)
@click.option(
"--max_tokens",
default=256,
type=int,
help="Maximum number of tokens in the generated text (default is 256).",
)
@click.option(
"--stream",
default=True,
type=bool,
help="Whether to stream the generated text. Use 'True' for streaming (default is True).",
)
@click.option("--model-uid", type=str)
@click.option("--max_tokens", default=256, type=int)
@click.option("--stream", default=True, type=bool)
def model_generate(
endpoint: Optional[str],
model_uid: str,
Expand Down Expand Up @@ -417,15 +550,25 @@ async def generate_internal():
print(f"{response['choices'][0]['text']}\n", file=sys.stdout)


@cli.command("chat")
@cli.command(
"chat", help="Engage in a chat session with a specified model in Xinference."
)
@click.option("--endpoint", "-e", type=str, help="Endpoint URL for the chat session.")
@click.option(
"--endpoint",
"-e",
type=str,
"--model-uid", type=str, help="Unique identifier of the model to use for chatting."
)
@click.option(
"--max_tokens",
default=256,
type=int,
help="Maximum number of tokens in each message (default is 256).",
)
@click.option(
"--stream",
default=True,
type=bool,
help="Whether to stream the chat messages. Use 'True' for streaming (default is True).",
)
@click.option("--model-uid", type=str)
@click.option("--max_tokens", default=256, type=int)
@click.option("--stream", default=True, type=bool)
def model_chat(
endpoint: Optional[str],
model_uid: str,
Expand Down

0 comments on commit 400c55a

Please sign in to comment.