Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add OpenTelemetry Tracing support as a preview feature #1288

Merged
merged 6 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,61 @@ Windows
.\<your-env>\Scripts\activate
pip install google-cloud-storage


Tracing With OpenTelemetry
~~~~~~~~~~~~~~~~~~~~~~~~~~

This is a PREVIEW FEATURE: Coverage and functionality are still in development and subject to change.

This library can be configured to use `OpenTelemetry`_ to generate traces on calls to Google Cloud Storage.
For information on the benefits and utility of tracing, read the `Cloud Trace Overview <https://cloud.google.com/trace/docs/overview>`_.

To enable OpenTelemetry tracing in the Cloud Storage client, first install OpenTelemetry:

.. code-block:: console

pip install google-cloud-storage[tracing]

Set the ``ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES`` environment variable to selectively opt-in tracing for the Cloud Storage client:

.. code-block:: console

export ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES=True

You will also need to tell OpenTelemetry which exporter to use. An example to export traces to Google Cloud Trace can be found below.

.. code-block:: console

# Install the Google Cloud Trace exporter and propagator, however you can use any exporter of your choice.
pip install opentelemetry-exporter-gcp-trace opentelemetry-propagator-gcp

# [Optional] Install the OpenTelemetry Requests Instrumentation to trace the underlying HTTP requests.
pip install opentelemetry-instrumentation-requests

.. code-block:: python

from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter

tracer_provider = TracerProvider()
tracer_provider.add_span_processor(BatchSpanProcessor(CloudTraceSpanExporter()))
trace.set_tracer_provider(tracer_provider)

# Optional yet recommended to instrument the requests HTTP library
from opentelemetry.instrumentation.requests import RequestsInstrumentor
RequestsInstrumentor().instrument(tracer_provider=tracer_provider)

In this example, tracing data will be published to the `Google Cloud Trace`_ console.
Tracing is most effective when many libraries are instrumented to provide insight over the entire lifespan of a request.
For a list of libraries that can be instrumented, refer to the `OpenTelemetry Registry`_.

.. _OpenTelemetry: https://opentelemetry.io
.. _OpenTelemetry Registry: https://opentelemetry.io/ecosystem/registry
.. _Google Cloud Trace: https://cloud.google.com/trace


Next Steps
~~~~~~~~~~

Expand Down
30 changes: 21 additions & 9 deletions google/cloud/storage/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from google.cloud import _http
from google.cloud.storage import __version__
from google.cloud.storage import _helpers
from google.cloud.storage._opentelemetry_tracing import create_trace_span


class Connection(_http.JSONConnection):
Expand Down Expand Up @@ -65,14 +66,25 @@ def __init__(self, client, client_info=None, api_endpoint=None):

def api_request(self, *args, **kwargs):
retry = kwargs.pop("retry", None)
kwargs["extra_api_info"] = _helpers._get_invocation_id()
invocation_id = _helpers._get_invocation_id()
kwargs["extra_api_info"] = invocation_id
span_attributes = {
"gccl-invocation-id": invocation_id,
}
call = functools.partial(super(Connection, self).api_request, *args, **kwargs)
if retry:
# If this is a ConditionalRetryPolicy, check conditions.
try:
retry = retry.get_retry_policy_if_conditions_met(**kwargs)
except AttributeError: # This is not a ConditionalRetryPolicy.
pass
with create_trace_span(
name="Storage.Connection.api_request",
attributes=span_attributes,
client=self._client,
api_request=kwargs,
retry=retry,
):
if retry:
call = retry(call)
return call()
# If this is a ConditionalRetryPolicy, check conditions.
try:
retry = retry.get_retry_policy_if_conditions_met(**kwargs)
except AttributeError: # This is not a ConditionalRetryPolicy.
pass
if retry:
call = retry(call)
return call()
112 changes: 112 additions & 0 deletions google/cloud/storage/_opentelemetry_tracing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Manages OpenTelemetry tracing span creation and handling. This is a PREVIEW FEATURE: Coverage and functionality may change."""

import logging
import os

from contextlib import contextmanager

from google.api_core import exceptions as api_exceptions
from google.api_core import retry as api_retry
from google.cloud.storage import __version__
from google.cloud.storage.retry import ConditionalRetryPolicy


ENABLE_OTEL_TRACES_ENV_VAR = "ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES"
_DEFAULT_ENABLE_OTEL_TRACES_VALUE = False

enable_otel_traces = os.environ.get(
ENABLE_OTEL_TRACES_ENV_VAR, _DEFAULT_ENABLE_OTEL_TRACES_VALUE
)
logger = logging.getLogger(__name__)

try:
from opentelemetry import trace

HAS_OPENTELEMETRY = True

except ImportError:
logger.debug(
"This service is instrumented using OpenTelemetry. "
"OpenTelemetry or one of its components could not be imported; "
"please add compatible versions of opentelemetry-api and "
"opentelemetry-instrumentation packages in order to get Storage "
"Tracing data."
)
HAS_OPENTELEMETRY = False

_default_attributes = {
"rpc.service": "CloudStorage",
"rpc.system": "http",
"user_agent.original": f"gcloud-python/{__version__}",
}


@contextmanager
def create_trace_span(name, attributes=None, client=None, api_request=None, retry=None):
"""Creates a context manager for a new span and set it as the current span
in the configured tracer. If no configuration exists yields None."""
if not HAS_OPENTELEMETRY or not enable_otel_traces:
yield None
return

tracer = trace.get_tracer(__name__)
final_attributes = _get_final_attributes(attributes, client, api_request, retry)
# Yield new span.
with tracer.start_as_current_span(
name=name, kind=trace.SpanKind.CLIENT, attributes=final_attributes
) as span:
try:
yield span
except api_exceptions.GoogleAPICallError as error:
span.set_status(trace.Status(trace.StatusCode.ERROR))
span.record_exception(error)
raise


def _get_final_attributes(attributes=None, client=None, api_request=None, retry=None):
collected_attr = _default_attributes.copy()
if api_request:
collected_attr.update(_set_api_request_attr(api_request, client))
if isinstance(retry, api_retry.Retry):
collected_attr.update(_set_retry_attr(retry))
if isinstance(retry, ConditionalRetryPolicy):
collected_attr.update(
_set_retry_attr(retry.retry_policy, retry.conditional_predicate)
)
if attributes:
collected_attr.update(attributes)
final_attributes = {k: v for k, v in collected_attr.items() if v is not None}
return final_attributes


def _set_api_request_attr(request, client):
attr = {}
if request.get("method"):
attr["http.request.method"] = request.get("method")
if request.get("path"):
path = request.get("path")
full_path = f"{client._connection.API_BASE_URL}{path}"
attr["url.full"] = full_path
if request.get("timeout"):
attr["connect_timeout,read_timeout"] = request.get("timeout")
return attr


def _set_retry_attr(retry, conditional_predicate=None):
predicate = conditional_predicate if conditional_predicate else retry._predicate
retry_info = f"multiplier{retry._multiplier}/deadline{retry._deadline}/max{retry._maximum}/initial{retry._initial}/predicate{predicate}"
return {"retry": retry_info}
5 changes: 5 additions & 0 deletions google/cloud/storage/acl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""Manage access to objects and buckets."""

from google.cloud.storage._helpers import _add_generation_match_parameters
from google.cloud.storage._opentelemetry_tracing import create_trace_span
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
from google.cloud.storage.retry import DEFAULT_RETRY
from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
Expand Down Expand Up @@ -359,6 +360,7 @@ def _require_client(self, client):
client = self.client
return client

@create_trace_span(name="Storage.ACL.reload")
def reload(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY):
"""Reload the ACL data from Cloud Storage.

Expand Down Expand Up @@ -484,6 +486,7 @@ def _save(

self.loaded = True

@create_trace_span(name="Storage.ACL.save")
def save(
self,
acl=None,
Expand Down Expand Up @@ -552,6 +555,7 @@ def save(
retry=retry,
)

@create_trace_span(name="Storage.ACL.savePredefined")
def save_predefined(
self,
predefined,
Expand Down Expand Up @@ -617,6 +621,7 @@ def save_predefined(
retry=retry,
)

@create_trace_span(name="Storage.ACL.clear")
def clear(
self,
client=None,
Expand Down
Loading