Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ def enable_open_telemetry(request):
Fixture to enable OpenTelemetry for the test.
"""
if request.param:
os.environ["RAY_enable_open_telemetry"] = "1"
os.environ["RAY_enable_open_telemetry"] = "true"
else:
os.environ["RAY_enable_open_telemetry"] = "0"
os.environ["RAY_enable_open_telemetry"] = "false"
yield
os.environ.pop("RAY_enable_open_telemetry", None)
Comment on lines 206 to 211
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve maintainability and avoid magic strings, you can define the environment variable name as a local variable. This also allows for making the code more concise.

Suggested change
if request.param:
os.environ["RAY_enable_open_telemetry"] = "1"
os.environ["RAY_enable_open_telemetry"] = "true"
else:
os.environ["RAY_enable_open_telemetry"] = "0"
os.environ["RAY_enable_open_telemetry"] = "false"
yield
os.environ.pop("RAY_enable_open_telemetry", None)
env_var = "RAY_enable_open_telemetry"
os.environ[env_var] = "true" if request.param else "false"
yield
os.environ.pop(env_var, None)


Expand Down
2 changes: 1 addition & 1 deletion python/ray/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ py_test_module_list(
py_test_module_list(
size = "medium",
env = {
"RAY_enable_open_telemetry": "1",
"RAY_enable_open_telemetry": "true",
},
files = [
"test_metric_cardinality.py",
Expand Down
5 changes: 3 additions & 2 deletions python/ray/tests/test_metric_cardinality.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
TASK_OR_ACTOR_NAME_TAG_KEY,
)

from ray._private.ray_constants import RAY_ENABLE_OPEN_TELEMETRY

try:
import prometheus_client
Expand All @@ -41,7 +42,7 @@ def _setup_cluster_for_test(request, ray_start_cluster):
"metrics_report_interval_ms": 1000,
"enable_metrics_collection": True,
"metric_cardinality_level": core_metric_cardinality_level,
"enable_open_telemetry": os.getenv("RAY_enable_open_telemetry") == "1",
"enable_open_telemetry": RAY_ENABLE_OPEN_TELEMETRY,
}
)
cluster.wait_for_nodes()
Expand Down Expand Up @@ -151,7 +152,7 @@ def test_cardinality_recommended_and_legacy_levels(
# implementation doesn't support low cardinality.
@pytest.mark.skipif(prometheus_client is None, reason="Prometheus not installed")
@pytest.mark.skipif(
os.getenv("RAY_enable_open_telemetry") != "1",
not RAY_ENABLE_OPEN_TELEMETRY,
reason="OpenTelemetry is not enabled",
)
@pytest.mark.parametrize(
Expand Down
11 changes: 7 additions & 4 deletions python/ray/tests/test_metrics_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
Gauge as MetricsAgentGauge,
PrometheusServiceDiscoveryWriter,
)
from ray._private.ray_constants import PROMETHEUS_SERVICE_DISCOVERY_FILE
from ray._private.ray_constants import (
PROMETHEUS_SERVICE_DISCOVERY_FILE,
RAY_ENABLE_OPEN_TELEMETRY,
)
from ray._private.test_utils import (
PrometheusTimeseries,
fetch_prometheus_metric_timeseries,
Expand Down Expand Up @@ -204,7 +207,7 @@ def _setup_cluster_for_test(request, ray_start_cluster):
"event_stats_print_interval_ms": 500,
"event_stats": True,
"enable_metrics_collection": enable_metrics_collection,
"enable_open_telemetry": os.getenv("RAY_enable_open_telemetry") == "1",
"enable_open_telemetry": RAY_ENABLE_OPEN_TELEMETRY,
}
)
# Add worker nodes.
Expand Down Expand Up @@ -320,7 +323,7 @@ def test_cases():
"test_driver_counter_total",
"test_gauge",
]
if os.environ.get("RAY_enable_open_telemetry") != "1"
if not RAY_ENABLE_OPEN_TELEMETRY
else [
"test_counter_total",
"test_driver_counter_total",
Expand Down Expand Up @@ -744,7 +747,7 @@ def wrap_test_case_for_retry():

@pytest.mark.skipif(sys.platform == "win32", reason="Not working in Windows.")
@pytest.mark.skipif(
os.environ.get("RAY_enable_open_telemetry") == "1",
RAY_ENABLE_OPEN_TELEMETRY,
reason="OpenTelemetry backend does not support Counter exported as gauge.",
)
def test_counter_exported_as_gauge(shutdown_only):
Expand Down
4 changes: 2 additions & 2 deletions python/ray/util/metrics.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import logging
import os
import re
import warnings
from typing import Any, Dict, List, Optional, Tuple, Union

from ray._private.ray_constants import env_bool
from ray._raylet import (
Count as CythonCount,
Gauge as CythonGauge,
Expand Down Expand Up @@ -198,7 +198,7 @@ def __init__(
if self._discard_metric:
self._metric = None
else:
if os.environ.get("RAY_enable_open_telemetry") == "1":
if env_bool("RAY_enable_open_telemetry", False):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Inconsistent Env Evaluation Between Metrics and Ray Internal Measures

Inconsistent environment variable evaluation timing. The change uses env_bool("RAY_enable_open_telemetry", False) which evaluates the environment variable at runtime (when Counter.init is called), while other parts of the codebase use the RAY_ENABLE_OPEN_TELEMETRY constant which is evaluated at module import time. This creates undefined behavior when the environment variable is set after importing ray.util.metrics but before creating a Counter instance. Since ray.util.metrics is a public API, users could set the environment variable at any time, leading to inconsistent behavior between the metrics module and internal Ray components. The fix should import and use RAY_ENABLE_OPEN_TELEMETRY constant instead of calling env_bool() at runtime.

Fix in Cursor Fix in Web

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it makes sense for test to evaluate at import time; in non-test code, it is consistently that the value will be evaluated at runtime

"""
For the new opentelemetry implementation, we'll correctly use Counter
rather than Sum.
Expand Down
2 changes: 1 addition & 1 deletion src/ray/stats/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ray_cc_test(
size = "small",
srcs = ["metric_with_open_telemetry_test.cc"],
env = {
"RAY_enable_open_telemetry": "1",
"RAY_enable_open_telemetry": "true",
},
tags = ["team:core"],
deps = [
Expand Down