diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py index 700c826b5..c119dcd80 100644 --- a/agentverse/llms/openai.py +++ b/agentverse/llms/openai.py @@ -113,6 +113,7 @@ def __init__(self, max_retry: int = 3, **kwargs): logger.warn(f"Unused arguments: {kwargs}") if args["model"] in LOCAL_LLMS: openai.api_base = "http://localhost:5000/v1" + openai.api_key = "EMPTY" super().__init__(args=args, max_retry=max_retry) @classmethod diff --git a/scripts/run_local_model_server.sh b/scripts/run_local_model_server.sh index 2ea115506..b4c2cfe73 100644 --- a/scripts/run_local_model_server.sh +++ b/scripts/run_local_model_server.sh @@ -3,8 +3,12 @@ # export CUDA_VISIBLE_DEVICES=0 MODEL_PATH="meta-llama/Llama-2-7b-chat-hf" # path_to_the_downloaded_model_dir MODEL_NAME="llama-2-7b-chat-hf" # name_of_the_model -python3 -m fastchat.serve.controller & \ +CONTROLLER_PORT=20002 +python3 -m fastchat.serve.controller --host 127.0.0.1 --port ${CONTROLLER_PORT} & \ python3 -m fastchat.serve.multi_model_worker \ --model-path ${MODEL_PATH} \ - --model-names ${MODEL_NAME} & \ -python3 -m fastchat.serve.openai_api_server --host localhost --port 5000 + --model-names ${MODEL_NAME} \ + --host 127.0.0.1 \ + --controller-address http://127.0.0.1:${CONTROLLER_PORT} \ + --worker-address http://127.0.0.1:21002 & \ +python3 -m fastchat.serve.openai_api_server --host 127.0.0.1 --port 5000 --controller-address http://127.0.0.1:${CONTROLLER_PORT}