From f69c732a043b347039ec82bf2c2624b013633ab3 Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Thu, 16 Sep 2021 19:22:44 +0100 Subject: [PATCH] opentelemetry callback: context propagation and error exception (#3378) * opentelemetry callback: context propagation and error exception * Apply suggestions from code review Co-authored-by: Felix Fontein Co-authored-by: Felix Fontein (cherry picked from commit 06345839c6e333f9021d255af5e2707ecbba2c12) --- plugins/callback/opentelemetry.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/plugins/callback/opentelemetry.py b/plugins/callback/opentelemetry.py index f256b7263d6..b523603828a 100644 --- a/plugins/callback/opentelemetry.py +++ b/plugins/callback/opentelemetry.py @@ -30,6 +30,13 @@ - The service name resource attribute. env: - name: OTEL_SERVICE_NAME + traceparent: + default: None + type: str + description: + - The L(W3C Trace Context header traceparent,https://www.w3.org/TR/trace-context-1/#traceparent-header). + env: + - name: TRACEPARENT requirements: - opentelemetry-api (python lib) - opentelemetry-exporter-otlp (python lib) @@ -64,9 +71,11 @@ try: from opentelemetry import trace + from opentelemetry.trace import SpanKind from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import SERVICE_NAME, Resource from opentelemetry.trace.status import Status, StatusCode + from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import ( ConsoleSpanExporter, @@ -151,6 +160,11 @@ def __init__(self, display): self._display = display + def traceparent_context(self, traceparent): + carrier = dict() + carrier['traceparent'] = traceparent + return TraceContextTextMapPropagator().extract(carrier=carrier) + def start_task(self, tasks_data, hide_task_arguments, play_name, task): """ record the start of a task for one or more hosts """ @@ -188,7 +202,7 @@ def finish_task(self, tasks_data, status, result): task.add_host(HostData(host_uuid, host_name, status, result)) - def generate_distributed_traces(self, otel_service_name, ansible_playbook, tasks_data, status): + def generate_distributed_traces(self, otel_service_name, ansible_playbook, tasks_data, status, traceparent): """ generate distributed traces from the collected TaskData and HostData """ tasks = [] @@ -210,7 +224,8 @@ def generate_distributed_traces(self, otel_service_name, ansible_playbook, tasks tracer = trace.get_tracer(__name__) - with tracer.start_as_current_span(ansible_playbook, start_time=parent_start_time) as parent: + with tracer.start_as_current_span(ansible_playbook, context=self.traceparent_context(traceparent), + start_time=parent_start_time, kind=SpanKind.SERVER) as parent: parent.set_status(status) # Populate trace metadata attributes if self.ansible_version is not None: @@ -244,7 +259,9 @@ def update_span_data(self, task_data, host_data, span): message = res['msg'] else: message = 'failed' - status = Status(status_code=StatusCode.ERROR) + status = Status(status_code=StatusCode.ERROR, description=message) + # Record an exception with the task message + span.record_exception(BaseException(message)) elif host_data.status == 'skipped': if 'skip_reason' in res: message = res['skip_reason'] @@ -291,6 +308,7 @@ def __init__(self, display=None): self.tasks_data = None self.errors = 0 self.disabled = False + self.traceparent = False if OTEL_LIBRARY_IMPORT_ERROR: raise_from( @@ -318,6 +336,9 @@ def set_options(self, task_keys=None, var_options=None, direct=None): if not self.otel_service_name: self.otel_service_name = 'ansible' + # See https://github.com/open-telemetry/opentelemetry-specification/issues/740 + self.traceparent = self.get_option('traceparent') + def v2_playbook_on_start(self, playbook): self.ansible_playbook = basename(playbook._file_name) @@ -394,7 +415,8 @@ def v2_playbook_on_stats(self, stats): self.otel_service_name, self.ansible_playbook, self.tasks_data, - status + status, + self.traceparent ) def v2_runner_on_async_failed(self, result, **kwargs):