Skip to content

Commit

Permalink
feat: trace improvements (#450)
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-sanche authored Dec 10, 2021
1 parent 898b5fe commit 08d35bb
Show file tree
Hide file tree
Showing 10 changed files with 380 additions and 102 deletions.
117 changes: 81 additions & 36 deletions google/cloud/logging_v2/handlers/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@
from google.cloud.logging_v2.handlers.middleware.request import _get_django_request

_DJANGO_CONTENT_LENGTH = "CONTENT_LENGTH"
_DJANGO_TRACE_HEADER = "HTTP_X_CLOUD_TRACE_CONTEXT"
_DJANGO_XCLOUD_TRACE_HEADER = "HTTP_X_CLOUD_TRACE_CONTEXT"
_DJANGO_TRACEPARENT = "HTTP_TRACEPARENT"
_DJANGO_USERAGENT_HEADER = "HTTP_USER_AGENT"
_DJANGO_REMOTE_ADDR_HEADER = "REMOTE_ADDR"
_DJANGO_REFERER_HEADER = "HTTP_REFERER"
_FLASK_TRACE_HEADER = "X_CLOUD_TRACE_CONTEXT"
_FLASK_XCLOUD_TRACE_HEADER = "X_CLOUD_TRACE_CONTEXT"
_FLASK_TRACEPARENT = "TRACEPARENT"
_PROTOCOL_HEADER = "SERVER_PROTOCOL"


Expand Down Expand Up @@ -62,13 +64,12 @@ def get_request_data_from_flask():
"""Get http_request and trace data from flask request headers.
Returns:
Tuple[Optional[dict], Optional[str], Optional[str]]:
Data related to the current http request, trace_id, and span_id for
the request. All fields will be None if a django request isn't
found.
Tuple[Optional[dict], Optional[str], Optional[str], bool]:
Data related to the current http request, trace_id, span_id and trace_sampled
for the request. All fields will be None if a django request isn't found.
"""
if flask is None or not flask.request:
return None, None, None
return None, None, None, False

# build http_request
http_request = {
Expand All @@ -79,25 +80,29 @@ def get_request_data_from_flask():
}

# find trace id and span id
header = flask.request.headers.get(_FLASK_TRACE_HEADER)
trace_id, span_id = _parse_trace_span(header)
# first check for w3c traceparent header
header = flask.request.headers.get(_FLASK_TRACEPARENT)
trace_id, span_id, trace_sampled = _parse_trace_parent(header)
if trace_id is None:
# traceparent not found. look for xcloud_trace_context header
header = flask.request.headers.get(_FLASK_XCLOUD_TRACE_HEADER)
trace_id, span_id, trace_sampled = _parse_xcloud_trace(header)

return http_request, trace_id, span_id
return http_request, trace_id, span_id, trace_sampled


def get_request_data_from_django():
"""Get http_request and trace data from django request headers.
Returns:
Tuple[Optional[dict], Optional[str], Optional[str]]:
Data related to the current http request, trace_id, and span_id for
the request. All fields will be None if a django request isn't
found.
Tuple[Optional[dict], Optional[str], Optional[str], bool]:
Data related to the current http request, trace_id, span_id, and trace_sampled
for the request. All fields will be None if a django request isn't found.
"""
request = _get_django_request()

if request is None:
return None, None, None
return None, None, None, False

# build http_request
http_request = {
Expand All @@ -108,54 +113,94 @@ def get_request_data_from_django():
}

# find trace id and span id
header = request.META.get(_DJANGO_TRACE_HEADER)
trace_id, span_id = _parse_trace_span(header)
# first check for w3c traceparent header
header = request.META.get(_DJANGO_TRACEPARENT)
trace_id, span_id, trace_sampled = _parse_trace_parent(header)
if trace_id is None:
# traceparent not found. look for xcloud_trace_context header
header = request.META.get(_DJANGO_XCLOUD_TRACE_HEADER)
trace_id, span_id, trace_sampled = _parse_xcloud_trace(header)

return http_request, trace_id, span_id
return http_request, trace_id, span_id, trace_sampled


def _parse_trace_span(header):
def _parse_trace_parent(header):
"""Given a w3 traceparent header, extract the trace and span ids.
For more information see https://www.w3.org/TR/trace-context/
Args:
header (str): the string extracted from the traceparent header
example: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01
Returns:
Tuple[Optional[dict], Optional[str], bool]:
The trace_id, span_id and trace_sampled extracted from the header
Each field will be None if header can't be parsed in expected format.
"""
trace_id = span_id = None
trace_sampled = False
# see https://www.w3.org/TR/trace-context/ for W3C traceparent format
if header:
try:
VERSION_PART = r"(?!ff)[a-f\d]{2}"
TRACE_ID_PART = r"(?![0]{32})[a-f\d]{32}"
PARENT_ID_PART = r"(?![0]{16})[a-f\d]{16}"
FLAGS_PART = r"[a-f\d]{2}"
regex = f"^\\s?({VERSION_PART})-({TRACE_ID_PART})-({PARENT_ID_PART})-({FLAGS_PART})(-.*)?\\s?$"
match = re.match(regex, header)
trace_id = match.group(2)
span_id = match.group(3)
# trace-flag component is an 8-bit bit field. Read as an int
int_flag = int(match.group(4), 16)
# trace sampled is set if the right-most bit in flag component is set
trace_sampled = bool(int_flag & 1)
except (IndexError, AttributeError):
# could not parse header as expected. Return None
pass
return trace_id, span_id, trace_sampled


def _parse_xcloud_trace(header):
"""Given an X_CLOUD_TRACE header, extract the trace and span ids.
Args:
header (str): the string extracted from the X_CLOUD_TRACE header
Returns:
Tuple[Optional[dict], Optional[str]]:
The trace_id and span_id extracted from the header
Tuple[Optional[dict], Optional[str], bool]:
The trace_id, span_id and trace_sampled extracted from the header
Each field will be None if not found.
"""
trace_id = None
span_id = None
trace_id = span_id = None
trace_sampled = False
# see https://cloud.google.com/trace/docs/setup for X-Cloud-Trace_Context format
if header:
try:
split_header = header.split("/", 1)
trace_id = split_header[0]
header_suffix = split_header[1]
# the span is the set of alphanumeric characters after the /
span_id = re.findall(r"^\w+", header_suffix)[0]
regex = r"([\w-]+)?(\/?([\w-]+))?(;?o=(\d))?"
match = re.match(regex, header)
trace_id = match.group(1)
span_id = match.group(3)
trace_sampled = match.group(5) == "1"
except IndexError:
pass
return trace_id, span_id
return trace_id, span_id, trace_sampled


def get_request_data():
"""Helper to get http_request and trace data from supported web
frameworks (currently supported: Flask and Django).
Returns:
Tuple[Optional[dict], Optional[str], Optional[str]]:
Data related to the current http request, trace_id, and span_id for
the request. All fields will be None if a django request isn't
found.
Tuple[Optional[dict], Optional[str], Optional[str], bool]:
Data related to the current http request, trace_id, span_id, and trace_sampled
for the request. All fields will be None if a http request isn't found.
"""
checkers = (
get_request_data_from_django,
get_request_data_from_flask,
)

for checker in checkers:
http_request, trace_id, span_id = checker()
http_request, trace_id, span_id, trace_sampled = checker()
if http_request is not None:
return http_request, trace_id, span_id
return http_request, trace_id, span_id, trace_sampled

return None, None, None
return None, None, None, False
4 changes: 2 additions & 2 deletions google/cloud/logging_v2/handlers/app_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def get_gae_labels(self):
"""
gae_labels = {}

_, trace_id, _ = get_request_data()
_, trace_id, _, _ = get_request_data()
if trace_id is not None:
gae_labels[_TRACE_ID_LABEL] = trace_id

Expand All @@ -115,7 +115,7 @@ def emit(self, record):
record (logging.LogRecord): The record to be logged.
"""
message = super(AppEngineHandler, self).format(record)
inferred_http, inferred_trace, _ = get_request_data()
inferred_http, inferred_trace, _, _ = get_request_data()
if inferred_trace is not None:
inferred_trace = f"projects/{self.project_id}/traces/{inferred_trace}"
# allow user overrides
Expand Down
10 changes: 9 additions & 1 deletion google/cloud/logging_v2/handlers/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,20 @@ def filter(self, record):
"""
user_labels = getattr(record, "labels", {})
# infer request data from the environment
inferred_http, inferred_trace, inferred_span = get_request_data()
(
inferred_http,
inferred_trace,
inferred_span,
inferred_sampled,
) = get_request_data()
if inferred_trace is not None and self.project is not None:
# add full path for detected trace
inferred_trace = f"projects/{self.project}/traces/{inferred_trace}"
# set new record values
record._resource = getattr(record, "resource", None)
record._trace = getattr(record, "trace", inferred_trace) or None
record._span_id = getattr(record, "span_id", inferred_span) or None
record._trace_sampled = bool(getattr(record, "trace_sampled", inferred_sampled))
record._http_request = getattr(record, "http_request", inferred_http)
record._source_location = CloudLoggingFilter._infer_source_location(record)
# add logger name as a label if possible
Expand All @@ -98,6 +104,7 @@ def filter(self, record):
# create string representations for structured logging
record._trace_str = record._trace or ""
record._span_id_str = record._span_id or ""
record._trace_sampled_str = "true" if record._trace_sampled else "false"
record._http_request_str = json.dumps(
record._http_request or {}, ensure_ascii=False
)
Expand Down Expand Up @@ -205,6 +212,7 @@ def emit(self, record):
labels=labels,
trace=record._trace,
span_id=record._span_id,
trace_sampled=record._trace_sampled,
http_request=record._http_request,
source_location=record._source_location,
)
Expand Down
1 change: 1 addition & 0 deletions google/cloud/logging_v2/handlers/structured_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
'"logging.googleapis.com/labels": %(_labels_str)s, '
'"logging.googleapis.com/trace": "%(_trace_str)s", '
'"logging.googleapis.com/spanId": "%(_span_id_str)s", '
'"logging.googleapis.com/trace_sampled": %(_trace_sampled_str)s, '
'"logging.googleapis.com/sourceLocation": %(_source_location_str)s, '
'"httpRequest": %(_http_request_str)s '
"}"
Expand Down
2 changes: 1 addition & 1 deletion tests/environment
4 changes: 4 additions & 0 deletions tests/system/test_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ def test_log_empty(self):

self.assertEqual(len(entries), 1)
self.assertIsNone(entries[0].payload)
self.assertFalse(entries[0].trace_sampled)

def test_log_struct_logentry_data(self):
logger = Config.CLIENT.logger(self._logger_name("log_w_struct"))
Expand All @@ -473,6 +474,7 @@ def test_log_struct_logentry_data(self):
self.assertEqual(entries[0].severity, "WARNING")
self.assertEqual(entries[0].trace, JSON_PAYLOAD["trace"])
self.assertEqual(entries[0].span_id, JSON_PAYLOAD["span_id"])
self.assertFalse(entries[0].trace_sampled)

def test_log_handler_async(self):
LOG_MESSAGE = "It was the worst of times"
Expand Down Expand Up @@ -534,6 +536,7 @@ def test_handlers_w_extras(self):
extra = {
"trace": "123",
"span_id": "456",
"trace_sampled": True,
"http_request": expected_request,
"source_location": expected_source,
"resource": Resource(type="cloudiot_device", labels={}),
Expand All @@ -545,6 +548,7 @@ def test_handlers_w_extras(self):
self.assertEqual(len(entries), 1)
self.assertEqual(entries[0].trace, extra["trace"])
self.assertEqual(entries[0].span_id, extra["span_id"])
self.assertTrue(entries[0].trace_sampled)
self.assertEqual(entries[0].http_request, expected_request)
self.assertEqual(
entries[0].labels, {**extra["labels"], "python_logger": LOGGER_NAME}
Expand Down
Loading

0 comments on commit 08d35bb

Please sign in to comment.