diff --git a/google/cloud/storage/_http.py b/google/cloud/storage/_http.py index b4e16ebe4..5062e5103 100644 --- a/google/cloud/storage/_http.py +++ b/google/cloud/storage/_http.py @@ -18,6 +18,7 @@ from google.cloud import _http from google.cloud.storage import __version__ from google.cloud.storage import _helpers +from google.cloud.storage._opentelemetry_tracing import create_span class Connection(_http.JSONConnection): @@ -65,14 +66,25 @@ def __init__(self, client, client_info=None, api_endpoint=None): def api_request(self, *args, **kwargs): retry = kwargs.pop("retry", None) - kwargs["extra_api_info"] = _helpers._get_invocation_id() + invocation_id = _helpers._get_invocation_id() + kwargs["extra_api_info"] = invocation_id + span_attributes = { + "gccl-invocation-id": invocation_id, + } call = functools.partial(super(Connection, self).api_request, *args, **kwargs) - if retry: - # If this is a ConditionalRetryPolicy, check conditions. - try: - retry = retry.get_retry_policy_if_conditions_met(**kwargs) - except AttributeError: # This is not a ConditionalRetryPolicy. - pass + with create_span( + name="Storage.Connection.api_request", + attributes=span_attributes, + client=self._client, + api_request=kwargs, + retry=retry, + ): if retry: - call = retry(call) - return call() + # If this is a ConditionalRetryPolicy, check conditions. + try: + retry = retry.get_retry_policy_if_conditions_met(**kwargs) + except AttributeError: # This is not a ConditionalRetryPolicy. + pass + if retry: + call = retry(call) + return call() diff --git a/google/cloud/storage/_opentelemetry_tracing.py b/google/cloud/storage/_opentelemetry_tracing.py new file mode 100644 index 000000000..fe878e16c --- /dev/null +++ b/google/cloud/storage/_opentelemetry_tracing.py @@ -0,0 +1,100 @@ +"""Manages OpenTelemetry tracing span creation and handling.""" + +import logging +import os + +from contextlib import contextmanager + +from google.api_core import exceptions as api_exceptions +from google.api_core import retry as api_retry +from google.cloud.storage import __version__ +from google.cloud.storage.retry import ConditionalRetryPolicy + + +ENABLE_OTEL_TRACES_ENV_VAR = "ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES" +_DEFAULT_ENABLE_OTEL_TRACES_VALUE = False + +enable_otel_traces = os.environ.get( + ENABLE_OTEL_TRACES_ENV_VAR, _DEFAULT_ENABLE_OTEL_TRACES_VALUE +) +logger = logging.getLogger(__name__) + +try: + from opentelemetry import trace + + HAS_OPENTELEMETRY = True + +except ImportError: + logger.debug( + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry or one of its components could not be imported; " + "please add compatible versions of opentelemetry-api and " + "opentelemetry-instrumentation packages in order to get Storage " + "Tracing data." + ) + HAS_OPENTELEMETRY = False + +_default_attributes = { + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", +} + + +@contextmanager +def create_span( + name, attributes=None, client=None, api_request=None, retry=None, **kwargs +): + """Creates a context manager for a new span and set it as the current span + in the configured tracer. If no configuration exists yields None.""" + if not HAS_OPENTELEMETRY or not enable_otel_traces: + yield None + return + + tracer = trace.get_tracer(__name__) + final_attributes = _get_final_attributes(attributes, client, api_request, retry) + # Yield new span. + with tracer.start_as_current_span( + name=name, kind=trace.SpanKind.CLIENT, attributes=final_attributes + ) as span: + try: + yield span + except api_exceptions.GoogleAPICallError as error: + span.set_status(trace.Status(trace.StatusCode.ERROR)) + span.record_exception(error) + raise + + +def _get_final_attributes(attributes=None, client=None, api_request=None, retry=None): + collected_attr = _default_attributes.copy() + if api_request: + collected_attr.update(_set_api_request_attr(api_request, client)) + if isinstance(retry, api_retry.Retry): + collected_attr.update(_set_retry_attr(retry)) + if isinstance(retry, ConditionalRetryPolicy): + collected_attr.update( + _set_retry_attr(retry.retry_policy, retry.conditional_predicate) + ) + if attributes: + collected_attr.update(attributes) + final_attributes = {k: v for k, v in collected_attr.items() if v is not None} + return final_attributes + + +def _set_api_request_attr(request, client): + attr = {} + if request.get("method"): + attr["http.request.method"] = request.get("method") + if request.get("path"): + path = request.get("path") + full_path = f"{client._connection.API_BASE_URL}{path}" + attr["url.full"] = full_path + if request.get("timeout"): + attr["connect_timeout,read_timeout"] = request.get("timeout") + return attr + + +def _set_retry_attr(retry, conditional_predicate=None): + predicate = conditional_predicate if conditional_predicate else retry._predicate + retry_info = f"multiplier{retry._multiplier}/deadline{retry._deadline}/max{retry._maximum}/initial{retry._initial}/predicate{predicate}" + return {"retry": retry_info} diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 57bbab008..3b572f4d1 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -41,6 +41,7 @@ from google.cloud.storage._helpers import _STORAGE_HOST_TEMPLATE from google.cloud.storage._helpers import _NOW from google.cloud.storage._helpers import _UTC +from google.cloud.storage._opentelemetry_tracing import create_span from google.cloud.storage._http import Connection from google.cloud.storage._signing import ( @@ -337,6 +338,7 @@ def current_batch(self): """ return self._batch_stack.top + @create_span(name="Storage.Client.getServiceAccountEmail") def get_service_account_email( self, project=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY ): diff --git a/noxfile.py b/noxfile.py index fb3d8f89e..319ae207e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -75,12 +75,16 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def default(session): +def default(session, install_extras=True): constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) # Install all test dependencies, then install this package in-place. session.install("mock", "pytest", "pytest-cov", "-c", constraints_path) + + if install_extras: + session.install("opentelemetry-api", "opentelemetry-sdk") + session.install("-e", ".", "-c", constraints_path) # Run py.test against the unit tests. diff --git a/setup.py b/setup.py index b2f5e411e..391bf7770 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,12 @@ "requests >= 2.18.0, < 3.0.0dev", "google-crc32c >= 1.0, < 2.0dev", ] -extras = {"protobuf": ["protobuf<5.0.0dev"]} +extras = { + "protobuf": ["protobuf<5.0.0dev"], + "tracing": [ + "opentelemetry-api >= 1.1.0", + ], +} # Setup boilerplate below this line. diff --git a/tests/unit/test__opentelemetry_tracing.py b/tests/unit/test__opentelemetry_tracing.py new file mode 100644 index 000000000..760dab24f --- /dev/null +++ b/tests/unit/test__opentelemetry_tracing.py @@ -0,0 +1,223 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import os +import pytest +import sys + +import mock +from google.api_core.exceptions import GoogleAPICallError +from google.cloud.storage import __version__ +from google.cloud.storage import _opentelemetry_tracing + + +@pytest.fixture +def setup(): + """Setup OTel packages and tracer provider.""" + try: + from opentelemetry import trace as trace_api + from opentelemetry.sdk.trace import TracerProvider, export + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + except ImportError: # pragma: NO COVER + pytest.skip("This test suite requires OpenTelemetry pacakges.") + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = export.SimpleSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace_api.set_tracer_provider(tracer_provider) + importlib.reload(_opentelemetry_tracing) + yield memory_exporter + + +@pytest.fixture() +def mock_os_environ(monkeypatch): + """Mock os.environ.""" + monkeypatch.setattr(os, "environ", {}) + return os.environ + + +@pytest.fixture() +def setup_optin(mock_os_environ): + """Mock envar to opt-in tracing for storage client.""" + mock_os_environ["ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES"] = True + importlib.reload(_opentelemetry_tracing) + + +def test_opentelemetry_not_installed(setup, monkeypatch): + monkeypatch.setitem(sys.modules, "opentelemetry", None) + importlib.reload(_opentelemetry_tracing) + # Test no-ops when OpenTelemetry is not installed. + with _opentelemetry_tracing.create_span("No-ops w/o opentelemetry") as span: + assert span is None + assert not _opentelemetry_tracing.HAS_OPENTELEMETRY + + +def test_opentelemetry_no_trace_optin(setup): + assert _opentelemetry_tracing.HAS_OPENTELEMETRY + assert not _opentelemetry_tracing.enable_otel_traces + # Test no-ops when user has not opt-in. + # This prevents customers accidentally being billed for tracing. + with _opentelemetry_tracing.create_span("No-ops w/o opt-in") as span: + assert span is None + + +def test_enable_trace_yield_span(setup, setup_optin): + assert _opentelemetry_tracing.HAS_OPENTELEMETRY + assert _opentelemetry_tracing.enable_otel_traces + with _opentelemetry_tracing.create_span("No-ops for opentelemetry") as span: + assert span is not None + + +def test_enable_trace_call(setup, setup_optin): + from opentelemetry import trace as trace_api + + extra_attributes = { + "attribute1": "value1", + } + expected_attributes = { + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", + } + expected_attributes.update(extra_attributes) + + with _opentelemetry_tracing.create_span( + "OtelTracing.Test", attributes=extra_attributes + ) as span: + span.set_attribute("after_setup_attribute", 1) + + expected_attributes["after_setup_attribute"] = 1 + + assert span.kind == trace_api.SpanKind.CLIENT + assert span.attributes == expected_attributes + assert span.name == "OtelTracing.Test" + + +def test_enable_trace_error(setup, setup_optin): + from opentelemetry import trace as trace_api + + extra_attributes = { + "attribute1": "value1", + } + expected_attributes = { + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", + } + expected_attributes.update(extra_attributes) + + with pytest.raises(GoogleAPICallError): + with _opentelemetry_tracing.create_span( + "OtelTracing.Test", attributes=extra_attributes + ) as span: + from google.cloud.exceptions import NotFound + + assert span.kind == trace_api.SpanKind.CLIENT + assert span.attributes == expected_attributes + assert span.name == "OtelTracing.Test" + raise NotFound("Test catching NotFound error in trace span.") + + +def test_get_final_attributes(setup, setup_optin): + from google.api_core import retry as api_retry + + test_span_name = "OtelTracing.Test" + test_span_attributes = { + "foo": "bar", + } + api_request = { + "method": "GET", + "path": "/foo/bar/baz", + "timeout": (100, 100), + } + retry_obj = api_retry.Retry() + + expected_attributes = { + "foo": "bar", + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", + "http.request.method": "GET", + "url.full": "https://testOtel.org/foo/bar/baz", + "connect_timeout,read_timeout": (100, 100), + "retry": f"multiplier{retry_obj._multiplier}/deadline{retry_obj._deadline}/max{retry_obj._maximum}/initial{retry_obj._initial}/predicate{retry_obj._predicate}", + } + + with mock.patch("google.cloud.storage.client.Client") as test_client: + test_client.project = "test_project" + test_client._connection.API_BASE_URL = "https://testOtel.org" + with _opentelemetry_tracing.create_span( + test_span_name, + attributes=test_span_attributes, + client=test_client, + api_request=api_request, + retry=retry_obj, + ) as span: + assert span is not None + assert span.name == test_span_name + assert span.attributes == expected_attributes + + +def test_set_conditional_retry_attr(setup, setup_optin): + from google.api_core import retry as api_retry + from google.cloud.storage.retry import ConditionalRetryPolicy + + test_span_name = "OtelTracing.Test" + retry_policy = api_retry.Retry() + conditional_predicate = mock.Mock() + required_kwargs = ("kwarg",) + retry_obj = ConditionalRetryPolicy( + retry_policy, conditional_predicate, required_kwargs + ) + + expected_attributes = { + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", + "retry": f"multiplier{retry_policy._multiplier}/deadline{retry_policy._deadline}/max{retry_policy._maximum}/initial{retry_policy._initial}/predicate{conditional_predicate}", + } + + with _opentelemetry_tracing.create_span( + test_span_name, + retry=retry_obj, + ) as span: + assert span is not None + assert span.name == test_span_name + assert span.attributes == expected_attributes + + +def test_set_api_request_attr(): + from google.cloud.storage import Client + + test_client = Client() + args_method = {"method": "GET"} + expected_attributes = {"http.request.method": "GET"} + attr = _opentelemetry_tracing._set_api_request_attr(args_method, test_client) + assert attr == expected_attributes + + args_path = {"path": "/foo/bar/baz"} + expected_attributes = {"url.full": "https://storage.googleapis.com/foo/bar/baz"} + attr = _opentelemetry_tracing._set_api_request_attr(args_path, test_client) + assert attr == expected_attributes + + args_timeout = {"timeout": (100, 100)} + expected_attributes = { + "connect_timeout,read_timeout": (100, 100), + } + attr = _opentelemetry_tracing._set_api_request_attr(args_timeout, test_client) + assert attr == expected_attributes