You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
日志信息如下:
INFO 06-10 21:39:09 async_llm_engine.py:553] Received request cmpl-8661e993f54d4529b989a4c74491433b: prompt: '[gMASK]<|user|>\nhello<|assistant|>', params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.7, top_p=1.0, top_k=-1, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=524282, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: [151331, 151333, 151336, 198, 14978, 151337], lora_request: None.
INFO: 10.59.37.65:59775 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in call
return await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/applications.py", line 123, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in call
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 756, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 103, in create_chat_completion
generator = await openai_serving_chat.create_chat_completion(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 198, in create_chat_completion
return await self.chat_completion_full_generator(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 360, in chat_completion_full_generator
async for res in result_generator:
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 662, in generate
async for output in self._process_request(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 769, in _process_request
raise e
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 765, in _process_request
async for request_output in stream:
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 80, in anext
raise result
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 40, in _raise_exception_on_finish
task.result()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 521, in run_engine_loop
has_requests_in_progress = await asyncio.wait_for(
File "/workspace/miniconda3/lib/python3.10/asyncio/tasks.py", line 445, in wait_for
return fut.result()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 495, in engine_step
request_outputs = await self.engine.step_async()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 226, in step_async
output = await self.model_executor.execute_model_async(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/executor/distributed_gpu_executor.py", line 166, in execute_model_async
return await self._driver_execute_model_async(execute_model_req)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/executor/ray_gpu_executor.py", line 324, in _driver_execute_model_async
return await self.driver_exec_method("execute_model",
File "/workspace/miniconda3/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 149, in execute_method
raise e
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 140, in execute_method
return executor(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/worker.py", line 272, in execute_model
output = self.model_runner.execute_model(seq_group_metadata_list,
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 728, in execute_model
hidden_states = model_executable(**execute_model_kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 364, in forward
hidden_states = self.transformer(input_ids, positions, kv_caches,
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 316, in forward
hidden_states = self.encoder(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 272, in forward
hidden_states = layer(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 207, in forward
attention_output = self.self_attention(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 106, in forward
context_layer = self.attn(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/layer.py", line 89, in forward
return self.impl.forward(query, key, value, kv_cache, attn_metadata,
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/backends/xformers.py", line 305, in forward
out = PagedAttention.forward_prefix(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/ops/paged_attn.py", line 200, in forward_prefix
context_attention_fwd(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/ops/prefix_prefill.py", line 757, in context_attention_fwd
_fwd_kernel[grid](
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/jit.py", line 167, in
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/jit.py", line 363, in run
device = driver.get_current_device()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 209, in getattr
self._initialize_obj()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 206, in _initialize_obj
self._obj = self._init_fn()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 239, in initialize_driver
return CudaDriver()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 102, in init
self.utils = CudaUtils()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 49, in init
so = _build("cuda_utils", src_path, tmpdir)
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/common/build.py", line 106, in _build
ret = subprocess.check_call(cc_cmd)
File "/workspace/miniconda3/lib/python3.10/subprocess.py", line 369, in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmpl8ttprie/main.c', '-O3', '-I/workspace/miniconda3/lib/python3.10/site-packages/triton/common/../third_party/cuda/include', '-I/workspace/miniconda3/include/python3.10', '-I/tmp/tmpl8ttprie', '-shared', '-fPIC', '-lcuda', '-o', '/tmp/tmpl8ttprie/cuda_utils.cpython-310-x86_64-linux-gnu.so', '-L/usr/lib64']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in call
return await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/applications.py", line 123, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in call
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 756, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 103, in create_chat_completion
generator = await openai_serving_chat.create_chat_completion(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 198, in create_chat_completion
return await self.chat_completion_full_generator(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 360, in chat_completion_full_generator
async for res in result_generator:
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 662, in generate
async for output in self._process_request(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 756, in _process_request
stream = await self.add_request(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 561, in add_request
self.start_background_loop()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 431, in start_background_loop
raise AsyncEngineDeadError(
vllm.engine.async_llm_engine.AsyncEngineDeadError: Background loop has errored already.
The text was updated successfully, but these errors were encountered:
请问下部署这个模型需要用到哪些版本呢?我部署成功了,但是一调接口就直接500了
使用的版本如下:
sh-4.2$ pip list | grep -P "vllm|torch|cuda"
nvidia-cuda-cupti-cu12 12.1.105
nvidia-cuda-nvrtc-cu12 12.1.105
nvidia-cuda-runtime-cu12 12.1.105
torch 2.3.0
vllm 0.4.3+cu118
vllm-nccl-cu12 2.18.1.0.3.0
日志信息如下:
INFO 06-10 21:39:09 async_llm_engine.py:553] Received request cmpl-8661e993f54d4529b989a4c74491433b: prompt: '[gMASK]<|user|>\nhello<|assistant|>', params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.7, top_p=1.0, top_k=-1, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=524282, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: [151331, 151333, 151336, 198, 14978, 151337], lora_request: None.
INFO: 10.59.37.65:59775 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in call
return await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/applications.py", line 123, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in call
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 756, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 103, in create_chat_completion
generator = await openai_serving_chat.create_chat_completion(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 198, in create_chat_completion
return await self.chat_completion_full_generator(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 360, in chat_completion_full_generator
async for res in result_generator:
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 662, in generate
async for output in self._process_request(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 769, in _process_request
raise e
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 765, in _process_request
async for request_output in stream:
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 80, in anext
raise result
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 40, in _raise_exception_on_finish
task.result()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 521, in run_engine_loop
has_requests_in_progress = await asyncio.wait_for(
File "/workspace/miniconda3/lib/python3.10/asyncio/tasks.py", line 445, in wait_for
return fut.result()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 495, in engine_step
request_outputs = await self.engine.step_async()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 226, in step_async
output = await self.model_executor.execute_model_async(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/executor/distributed_gpu_executor.py", line 166, in execute_model_async
return await self._driver_execute_model_async(execute_model_req)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/executor/ray_gpu_executor.py", line 324, in _driver_execute_model_async
return await self.driver_exec_method("execute_model",
File "/workspace/miniconda3/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 149, in execute_method
raise e
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 140, in execute_method
return executor(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/worker.py", line 272, in execute_model
output = self.model_runner.execute_model(seq_group_metadata_list,
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 728, in execute_model
hidden_states = model_executable(**execute_model_kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 364, in forward
hidden_states = self.transformer(input_ids, positions, kv_caches,
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 316, in forward
hidden_states = self.encoder(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 272, in forward
hidden_states = layer(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 207, in forward
attention_output = self.self_attention(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 106, in forward
context_layer = self.attn(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/layer.py", line 89, in forward
return self.impl.forward(query, key, value, kv_cache, attn_metadata,
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/backends/xformers.py", line 305, in forward
out = PagedAttention.forward_prefix(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/ops/paged_attn.py", line 200, in forward_prefix
context_attention_fwd(
File "/workspace/miniconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/attention/ops/prefix_prefill.py", line 757, in context_attention_fwd
_fwd_kernel[grid](
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/jit.py", line 167, in
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/jit.py", line 363, in run
device = driver.get_current_device()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 209, in getattr
self._initialize_obj()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 206, in _initialize_obj
self._obj = self._init_fn()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 239, in initialize_driver
return CudaDriver()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 102, in init
self.utils = CudaUtils()
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/runtime/driver.py", line 49, in init
so = _build("cuda_utils", src_path, tmpdir)
File "/workspace/miniconda3/lib/python3.10/site-packages/triton/common/build.py", line 106, in _build
ret = subprocess.check_call(cc_cmd)
File "/workspace/miniconda3/lib/python3.10/subprocess.py", line 369, in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmpl8ttprie/main.c', '-O3', '-I/workspace/miniconda3/lib/python3.10/site-packages/triton/common/../third_party/cuda/include', '-I/workspace/miniconda3/include/python3.10', '-I/tmp/tmpl8ttprie', '-shared', '-fPIC', '-lcuda', '-o', '/tmp/tmpl8ttprie/cuda_utils.cpython-310-x86_64-linux-gnu.so', '-L/usr/lib64']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/workspace/miniconda3/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in call
return await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/applications.py", line 123, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in call
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 756, in call
await self.middleware_stack(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/workspace/miniconda3/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/workspace/miniconda3/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 103, in create_chat_completion
generator = await openai_serving_chat.create_chat_completion(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 198, in create_chat_completion
return await self.chat_completion_full_generator(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.py", line 360, in chat_completion_full_generator
async for res in result_generator:
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 662, in generate
async for output in self._process_request(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 756, in _process_request
stream = await self.add_request(
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 561, in add_request
self.start_background_loop()
File "/workspace/miniconda3/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 431, in start_background_loop
raise AsyncEngineDeadError(
vllm.engine.async_llm_engine.AsyncEngineDeadError: Background loop has errored already.
The text was updated successfully, but these errors were encountered: