Skip to content

Commit

Permalink
bump fastdeploy_llm to v1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rainyfly committed Dec 26, 2023
1 parent 4749a60 commit fc61eb4
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
2 changes: 1 addition & 1 deletion llm/fastdeploy_llm/serving/serving_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def runner(self):
task = self.requests_queue.get() # only block when get first data
else:
task = self.requests_queue.get(timeout=0.01) # wait only 10ms for batch
get_tasks.append(task)
get_tasks.append(task)
except Exception as e:
break

Expand Down
7 changes: 5 additions & 2 deletions llm/fastdeploy_llm/serving/triton_model_nonstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
import functools
from collections import defaultdict
from fastdeploy_llm.serving.serving_model import ServingModel
from fastdeploy_llm.utils.logging_util import logger
from fastdeploy_llm.utils.logging_util import logger, warning_logger
from fastdeploy_llm.utils.logging_util import error_format, ErrorCode, ErrorType
from fastdeploy_llm.task import Task, BatchTask
import fastdeploy_llm as fdlm
import queue
Expand Down Expand Up @@ -51,7 +52,8 @@ def stream_call_back(call_back_task, token_tuple, index, is_last_token,
out_tensor = pb_utils.Tensor(
"OUT", np.array(
[json.dumps(out)], dtype=np.object_))
response_dict[call_back_task.task_id] = out_tensor
response = pb_utils.InferenceResponse([out_tensor])
response_dict[call_back_task.task_id] = response
response_finished_queue.put(call_back_task.task_id)

logger.info("Model output for req_id: {} results_all: {} tokens_all: {}".format(call_back_task.task_id, all_strs, all_token_ids))
Expand Down Expand Up @@ -233,6 +235,7 @@ def execute(self, requests):
responses[index] = response_dict[task_id]
del inflight_valid_tasks[task_id]
del response_dict[task_id]
del self.response_handler[task_id]
except:
for task_id, index in inflight_valid_tasks.items():
error_type = ErrorType.Query
Expand Down
2 changes: 1 addition & 1 deletion llm/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setuptools.setup(
name="fastdeploy-llm",
version="0.0.9",
version="1.0.0",
author="fastdeploy",
author_email="fastdeploy@baidu.com",
description="FastDeploy for Large Language Model",
Expand Down

0 comments on commit fc61eb4

Please sign in to comment.