Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI/Build] Pin OpenTelemetry versions and make availability errors clearer #7266

Merged
merged 4 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ steps:
commands:
- pytest -v -s metrics
- "pip install \
opentelemetry-sdk \
opentelemetry-api \
opentelemetry-exporter-otlp \
opentelemetry-semantic-conventions-ai"
'opentelemetry-sdk>=1.26.0,<1.27.0' \
'opentelemetry-api>=1.26.0,<1.27.0' \
'opentelemetry-exporter-otlp>=1.26.0,<1.27.0' \
'opentelemetry-semantic-conventions-ai>=0.4.1,<0.5.0'"
- pytest -v -s tracing

##### fast check tests #####
Expand Down
8 changes: 4 additions & 4 deletions examples/production_monitoring/Otel.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
1. Install OpenTelemetry packages:
```
pip install \
opentelemetry-sdk \
opentelemetry-api \
opentelemetry-exporter-otlp \
opentelemetry-semantic-conventions-ai
'opentelemetry-sdk>=1.26.0,<1.27.0' \
'opentelemetry-api>=1.26.0,<1.27.0' \
'opentelemetry-exporter-otlp>=1.26.0,<1.27.0' \
'opentelemetry-semantic-conventions-ai>=0.4.1,<0.5.0'
```

1. Start Jaeger in a docker container:
Expand Down
10 changes: 6 additions & 4 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.model_executor.models import ModelRegistry
from vllm.platforms import current_platform
from vllm.tracing import is_otel_installed
from vllm.tracing import is_otel_available, otel_import_error_traceback
from vllm.transformers_utils.config import get_config, get_hf_text_config
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_CUDAGRAPH, GiB_bytes,
cuda_device_count_stateless, get_cpu_memory, is_cpu,
Expand Down Expand Up @@ -1721,9 +1721,11 @@ class ObservabilityConfig:
collect_model_execute_time: bool = False

def __post_init__(self):
if not is_otel_installed() and self.otlp_traces_endpoint is not None:
raise ValueError("OpenTelemetry packages must be installed before "
"configuring 'otlp_traces_endpoint'")
if not is_otel_available() and self.otlp_traces_endpoint is not None:
raise ValueError(
"OpenTelemetry is not available. Unable to configure "
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
f"installed. Original error:\n{otel_import_error_traceback}")

if ((self.collect_model_forward_time
or self.collect_model_execute_time)
Expand Down
24 changes: 17 additions & 7 deletions vllm/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

logger = init_logger(__name__)

_is_otel_installed = False
_is_otel_imported = False
otel_import_error_traceback: Optional[str] = None
try:
from opentelemetry.context.context import Context
from opentelemetry.sdk.environment_variables import (
Expand All @@ -19,8 +20,14 @@
from opentelemetry.trace import SpanKind, Tracer, set_tracer_provider
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator)
_is_otel_installed = True
_is_otel_imported = True
except ImportError:
# Capture and format traceback to provide detailed context for the import
# error. Only the string representation of the error is retained to avoid
# memory leaks.
# See https://github.com/vllm-project/vllm/pull/7266#discussion_r1707395458
import traceback
otel_import_error_traceback = traceback.format_exc()

class Context: # type: ignore
pass
Expand All @@ -35,14 +42,17 @@ class Tracer: # type: ignore
pass


def is_otel_installed() -> bool:
return _is_otel_installed
def is_otel_available() -> bool:
return _is_otel_imported


def init_tracer(instrumenting_module_name: str,
otlp_traces_endpoint: str) -> Optional[Tracer]:
assert is_otel_installed(), ("OpenTelemetry packages must be installed "
"prior to initializing a tracer")
if not is_otel_available():
raise ValueError(
"OpenTelemetry is not available. Unable to initialize "
"a tracer. Ensure OpenTelemetry packages are installed. "
f"Original error:\n{otel_import_error_traceback}")
trace_provider = TracerProvider()

span_exporter = get_span_exporter(otlp_traces_endpoint)
Expand Down Expand Up @@ -70,7 +80,7 @@ def get_span_exporter(endpoint):

def extract_trace_context(
headers: Optional[Mapping[str, str]]) -> Optional[Context]:
if is_otel_installed():
if is_otel_available():
headers = headers or {}
return TraceContextTextMapPropagator().extract(headers)
else:
Expand Down
Loading