diff --git a/opentelemetry-api/src/opentelemetry/context/propagation/httptextformat.py b/opentelemetry-api/src/opentelemetry/context/propagation/httptextformat.py index 35bdfbb3fe8..9b6098a9a42 100644 --- a/opentelemetry-api/src/opentelemetry/context/propagation/httptextformat.py +++ b/opentelemetry-api/src/opentelemetry/context/propagation/httptextformat.py @@ -19,8 +19,8 @@ _T = typing.TypeVar("_T") -Setter = typing.Callable[[typing.Type[_T], str, str], None] -Getter = typing.Callable[[typing.Type[_T], str], typing.List[str]] +Setter = typing.Callable[[_T, str, str], None] +Getter = typing.Callable[[_T, str], typing.List[str]] class HTTPTextFormat(abc.ABC): diff --git a/opentelemetry-api/src/opentelemetry/context/propagation/tracecontexthttptextformat.py b/opentelemetry-api/src/opentelemetry/context/propagation/tracecontexthttptextformat.py index 575644a91f2..abe778db953 100644 --- a/opentelemetry-api/src/opentelemetry/context/propagation/tracecontexthttptextformat.py +++ b/opentelemetry-api/src/opentelemetry/context/propagation/tracecontexthttptextformat.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +import re import typing import opentelemetry.trace as trace @@ -20,20 +20,143 @@ _T = typing.TypeVar("_T") +# Keys and values are strings of up to 256 printable US-ASCII characters. +# Implementations should conform to the the `W3C Trace Context - Tracestate`_ +# spec, which describes additional restrictions on valid field values. +# +# .. _W3C Trace Context - Tracestate: +# https://www.w3.org/TR/trace-context/#tracestate-field + + +_KEY_WITHOUT_VENDOR_FORMAT = r"[a-z][_0-9a-z\-\*\/]{0,255}" +_KEY_WITH_VENDOR_FORMAT = ( + r"[a-z][_0-9a-z\-\*\/]{0,240}@[a-z][_0-9a-z\-\*\/]{0,13}" +) + +_KEY_FORMAT = _KEY_WITHOUT_VENDOR_FORMAT + "|" + _KEY_WITH_VENDOR_FORMAT +_VALUE_FORMAT = ( + r"[\x20-\x2b\x2d-\x3c\x3e-\x7e]{0,255}[\x21-\x2b\x2d-\x3c\x3e-\x7e]" +) + +_DELIMITER_FORMAT = "[ \t]*,[ \t]*" +_MEMBER_FORMAT = "({})(=)({})".format(_KEY_FORMAT, _VALUE_FORMAT) + +_DELIMITER_FORMAT_RE = re.compile(_DELIMITER_FORMAT) +_MEMBER_FORMAT_RE = re.compile(_MEMBER_FORMAT) + class TraceContextHTTPTextFormat(httptextformat.HTTPTextFormat): - """TODO: extracts and injects using w3c TraceContext's headers. + """Extracts and injects using w3c TraceContext's headers. """ + _TRACEPARENT_HEADER_NAME = "traceparent" + _TRACESTATE_HEADER_NAME = "tracestate" + _TRACEPARENT_HEADER_FORMAT = ( + "^[ \t]*([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})" + + "(-.*)?[ \t]*$" + ) + _TRACEPARENT_HEADER_FORMAT_RE = re.compile(_TRACEPARENT_HEADER_FORMAT) + + @classmethod def extract( - self, _get_from_carrier: httptextformat.Getter[_T], _carrier: _T + cls, get_from_carrier: httptextformat.Getter[_T], carrier: _T ) -> trace.SpanContext: - return trace.INVALID_SPAN_CONTEXT + """Extracts a valid SpanContext from the carrier. + """ + header = get_from_carrier(carrier, cls._TRACEPARENT_HEADER_NAME) + + if not header: + return trace.INVALID_SPAN_CONTEXT + + match = re.search(cls._TRACEPARENT_HEADER_FORMAT_RE, header[0]) + if not match: + return trace.INVALID_SPAN_CONTEXT + + version = match.group(1) + trace_id = match.group(2) + span_id = match.group(3) + trace_options = match.group(4) + + if trace_id == "0" * 32 or span_id == "0" * 16: + return trace.INVALID_SPAN_CONTEXT + + if version == "00": + if match.group(5): + return trace.INVALID_SPAN_CONTEXT + if version == "ff": + return trace.INVALID_SPAN_CONTEXT + tracestate = trace.TraceState() + for tracestate_header in get_from_carrier( + carrier, cls._TRACESTATE_HEADER_NAME + ): + # typing.Dict's update is not recognized by pylint: + # https://github.com/PyCQA/pylint/issues/2420 + tracestate.update( # pylint:disable=E1101 + _parse_tracestate(tracestate_header) + ) + + span_context = trace.SpanContext( + trace_id=int(trace_id, 16), + span_id=int(span_id, 16), + trace_options=trace.TraceOptions(trace_options), + trace_state=tracestate, + ) + + return span_context + + @classmethod def inject( - self, + cls, context: trace.SpanContext, set_in_carrier: httptextformat.Setter[_T], carrier: _T, ) -> None: - pass + if context == trace.INVALID_SPAN_CONTEXT: + return + traceparent_string = "00-{:032x}-{:016x}-{:02x}".format( + context.trace_id, context.span_id, context.trace_options + ) + set_in_carrier( + carrier, cls._TRACEPARENT_HEADER_NAME, traceparent_string + ) + if context.trace_state: + tracestate_string = _format_tracestate(context.trace_state) + set_in_carrier( + carrier, cls._TRACESTATE_HEADER_NAME, tracestate_string + ) + + +def _parse_tracestate(string: str) -> trace.TraceState: + """Parse a w3c tracestate header into a TraceState. + + Args: + string: the value of the tracestate header. + + Returns: + A valid TraceState that contains values extracted from + the tracestate header. + """ + tracestate = trace.TraceState() + for member in re.split(_DELIMITER_FORMAT_RE, string): + match = _MEMBER_FORMAT_RE.match(member) + if not match: + raise ValueError("illegal key-value format %r" % (member)) + key, _eq, value = match.groups() + # typing.Dict's update is not recognized by pylint: + # https://github.com/PyCQA/pylint/issues/2420 + tracestate[key] = value # pylint:disable=E1137 + return tracestate + + +def _format_tracestate(tracestate: trace.TraceState) -> str: + """Parse a w3c tracestate header into a TraceState. + + Args: + tracestate: the tracestate header to write + + Returns: + A string that adheres to the w3c tracestate + header format. + """ + return ",".join(key + "=" + value for key, value in tracestate.items()) diff --git a/opentelemetry-api/tests/context/__init__.py b/opentelemetry-api/tests/context/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/opentelemetry-api/tests/context/propagation/__init__.py b/opentelemetry-api/tests/context/propagation/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/opentelemetry-api/tests/context/propagation/test_tracecontexthttptextformat.py b/opentelemetry-api/tests/context/propagation/test_tracecontexthttptextformat.py new file mode 100644 index 00000000000..aaf392be248 --- /dev/null +++ b/opentelemetry-api/tests/context/propagation/test_tracecontexthttptextformat.py @@ -0,0 +1,215 @@ +# Copyright 2019, OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing +import unittest + +from opentelemetry import trace +from opentelemetry.context.propagation import tracecontexthttptextformat + +FORMAT = tracecontexthttptextformat.TraceContextHTTPTextFormat() + + +def get_as_list( + dict_object: typing.Dict[str, str], key: str +) -> typing.List[str]: + value = dict_object.get(key) + return [value] if value is not None else [] + + +class TestTraceContextFormat(unittest.TestCase): + TRACE_ID = int("12345678901234567890123456789012", 16) # type:int + SPAN_ID = int("1234567890123456", 16) # type:int + + def test_no_traceparent_header(self): + """When tracecontext headers are not present, a new SpanContext + should be created. + + RFC 4.2.2: + + If no traceparent header is received, the vendor creates a new trace-id and parent-id that represents the current request. + """ + output = {} # type:typing.Dict[str, str] + span_context = FORMAT.extract(get_as_list, output) + self.assertTrue(isinstance(span_context, trace.SpanContext)) + + def test_from_headers_tracestate_entry_limit(self): + """If more than 33 entries are passed, allow them. + + We are explicitly choosing not to limit the list members + as outlined in RFC 3.3.1.1 + + RFC 3.3.1.1 + + There can be a maximum of 32 list-members in a list. + """ + + span_context = FORMAT.extract( + get_as_list, + { + "traceparent": "00-12345678901234567890123456789012-1234567890123456-00", + "tracestate": ",".join( + [ + "a00=0,a01=1,a02=2,a03=3,a04=4,a05=5,a06=6,a07=7,a08=8,a09=9", + "b00=0,b01=1,b02=2,b03=3,b04=4,b05=5,b06=6,b07=7,b08=8,b09=9", + "c00=0,c01=1,c02=2,c03=3,c04=4,c05=5,c06=6,c07=7,c08=8,c09=9", + "d00=0,d01=1,d02=2", + ] + ), + }, + ) + self.assertEqual(len(span_context.trace_state), 33) + + def test_from_headers_tracestate_duplicated_keys(self): + """If a duplicate tracestate header is present, the most recent entry + is used. + + RFC 3.3.1.4 + + Only one entry per key is allowed because the entry represents that last position in the trace. + Hence vendors must overwrite their entry upon reentry to their tracing system. + + For example, if a vendor name is Congo and a trace started in their system and then went through + a system named Rojo and later returned to Congo, the tracestate value would not be: + + congo=congosFirstPosition,rojo=rojosFirstPosition,congo=congosSecondPosition + + Instead, the entry would be rewritten to only include the most recent position: + + congo=congosSecondPosition,rojo=rojosFirstPosition + """ + span_context = FORMAT.extract( + get_as_list, + { + "traceparent": "00-12345678901234567890123456789012-1234567890123456-00", + "tracestate": "foo=1,bar=2,foo=3", + }, + ) + self.assertEqual(span_context.trace_state, {"foo": "3", "bar": "2"}) + + def test_headers_with_tracestate(self): + """When there is a traceparent and tracestate header, data from + both should be addded to the SpanContext. + """ + traceparent_value = "00-{trace_id}-{span_id}-00".format( + trace_id=format(self.TRACE_ID, "032x"), + span_id=format(self.SPAN_ID, "016x"), + ) + tracestate_value = "foo=1,bar=2,baz=3" + span_context = FORMAT.extract( + get_as_list, + {"traceparent": traceparent_value, "tracestate": tracestate_value}, + ) + self.assertEqual(span_context.trace_id, self.TRACE_ID) + self.assertEqual(span_context.span_id, self.SPAN_ID) + self.assertEqual( + span_context.trace_state, {"foo": "1", "bar": "2", "baz": "3"} + ) + + output = {} # type:typing.Dict[str, str] + FORMAT.inject(span_context, dict.__setitem__, output) + self.assertEqual(output["traceparent"], traceparent_value) + for pair in ["foo=1", "bar=2", "baz=3"]: + self.assertIn(pair, output["tracestate"]) + self.assertEqual(output["tracestate"].count(","), 2) + + def test_invalid_trace_id(self): + """If the trace id is invalid, we must ignore the full traceparent header. + + Also ignore any tracestate. + + RFC 3.2.2.3 + + If the trace-id value is invalid (for example if it contains non-allowed characters or all + zeros), vendors MUST ignore the traceparent. + + RFC 3.3 + + If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate. + Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent. + """ + span_context = FORMAT.extract( + get_as_list, + { + "traceparent": "00-00000000000000000000000000000000-1234567890123456-00", + "tracestate": "foo=1,bar=2,foo=3", + }, + ) + self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT) + + def test_invalid_parent_id(self): + """If the parent id is invalid, we must ignore the full traceparent header. + + Also ignore any tracestate. + + RFC 3.2.2.3 + + Vendors MUST ignore the traceparent when the parent-id is invalid (for example, + if it contains non-lowercase hex characters). + + RFC 3.3 + + If the vendor failed to parse traceparent, it MUST NOT attempt to parse tracestate. + Note that the opposite is not true: failure to parse tracestate MUST NOT affect the parsing of traceparent. + """ + span_context = FORMAT.extract( + get_as_list, + { + "traceparent": "00-00000000000000000000000000000000-0000000000000000-00", + "tracestate": "foo=1,bar=2,foo=3", + }, + ) + self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT) + + def test_no_send_empty_tracestate(self): + """If the tracestate is empty, do not set the header. + + RFC 3.3.1.1 + + Empty and whitespace-only list members are allowed. Vendors MUST accept empty + tracestate headers but SHOULD avoid sending them. + """ + output = {} # type:typing.Dict[str, str] + FORMAT.inject( + trace.SpanContext(self.TRACE_ID, self.SPAN_ID), + dict.__setitem__, + output, + ) + self.assertTrue("traceparent" in output) + self.assertFalse("tracestate" in output) + + def test_format_not_supported(self): + """If the traceparent does not adhere to the supported format, discard it and + create a new tracecontext. + + RFC 4.3 + + If the version cannot be parsed, the vendor creates a new traceparent header and + deletes tracestate. + """ + span_context = FORMAT.extract( + get_as_list, + { + "traceparent": "00-12345678901234567890123456789012-1234567890123456-00-residue", + "tracestate": "foo=1,bar=2,foo=3", + }, + ) + self.assertEqual(span_context, trace.INVALID_SPAN_CONTEXT) + + def test_propagate_invalid_context(self): + """Do not propagate invalid trace context. + """ + output = {} # type:typing.Dict[str, str] + FORMAT.inject(trace.INVALID_SPAN_CONTEXT, dict.__setitem__, output) + self.assertFalse("traceparent" in output)