Skip to content

Commit

Permalink
Refactor httpx content type handling (#720)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmojaki authored Dec 23, 2024
1 parent 4963b14 commit 9933ce3
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 15 deletions.
77 changes: 63 additions & 14 deletions logfire/_internal/integrations/httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import inspect
from contextlib import suppress
from email.message import Message
from email.headerregistry import ContentTypeHeader
from email.policy import EmailPolicy
from functools import lru_cache
from typing import TYPE_CHECKING, Any, Callable, Literal, Mapping, cast

import httpx
Expand Down Expand Up @@ -230,8 +232,7 @@ async def new_hook(span: Span, request: RequestInfo, response: ResponseInfo) ->


def capture_response_json(logfire_instance: Logfire, response_info: ResponseInfo, is_async: bool) -> None:
headers = response_info.headers
if not headers.get('content-type', '').lower().startswith('application/json'):
if not is_json_type(response_info.headers.get('content-type', '')):
return

frame = inspect.currentframe().f_back.f_back # type: ignore
Expand Down Expand Up @@ -311,14 +312,7 @@ def capture_headers(span: Span, headers: httpx.Headers, request_or_response: Lit
)


def get_charset(content_type: str) -> str:
m = Message()
m['content-type'] = content_type
return cast(str, m.get_param('charset', 'utf-8'))


def decode_body(body: bytes, content_type: str):
charset = get_charset(content_type)
def decode_body(body: bytes, charset: str):
with suppress(UnicodeDecodeError, LookupError):
return body.decode(charset)
if charset.lower() not in ('utf-8', 'utf8'):
Expand All @@ -328,13 +322,16 @@ def decode_body(body: bytes, content_type: str):


def capture_request_body(span: Span, request: RequestInfo) -> None:
content_type = request.headers.get('content-type', '').lower()
if not isinstance(request.stream, httpx.ByteStream):
return
if not content_type.startswith('application/json'):

content_type_string = request.headers.get('content-type', '')
if not is_json_type(content_type_string):
return

body = decode_body(list(request.stream)[0], content_type)
content_type_header = content_type_header_from_string(content_type_string)
charset = content_type_header.params.get('charset', 'utf-8')
body = decode_body(list(request.stream)[0], charset)

attr_name = 'http.request.body.json'
set_user_attributes_on_raw_span(span, {attr_name: {}}) # type: ignore
Expand Down Expand Up @@ -370,3 +367,55 @@ def capture_request_form_data(span: Span, request: RequestInfo) -> None:
return
span = cast(opentelemetry.sdk.trace.Span, span)
set_user_attributes_on_raw_span(span, {'http.request.body.form': data})


@lru_cache
def content_type_header_from_string(content_type: str) -> ContentTypeHeader:
return EmailPolicy.header_factory('content-type', content_type)


def content_type_subtypes(subtype: str) -> set[str]:
if subtype.startswith('x-'):
subtype = subtype[2:]
return set(subtype.split('+'))


@lru_cache
def is_json_type(content_type: str) -> bool:
header = content_type_header_from_string(content_type)
return header.maintype == 'application' and 'json' in content_type_subtypes(header.subtype)


TEXT_SUBTYPES = {
'json',
'jsonp',
'json-p',
'javascript',
'jsonl',
'json-l',
'jsonlines',
'json-lines',
'ndjson',
'nd-json',
'json5',
'json-5',
'xml',
'xhtml',
'html',
'csv',
'tsv',
'yaml',
'yml',
'toml',
}


@lru_cache
def is_text_type(content_type: str) -> bool:
header = content_type_header_from_string(content_type)
if header.maintype == 'text':
return True
if header.maintype != 'application':
return False

return bool(content_type_subtypes(header.subtype) & TEXT_SUBTYPES)
28 changes: 27 additions & 1 deletion tests/otel_integrations/test_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import logfire
import logfire._internal.integrations.httpx
from logfire._internal.integrations.httpx import CODES_FOR_METHODS_WITH_DATA_PARAM
from logfire._internal.integrations.httpx import CODES_FOR_METHODS_WITH_DATA_PARAM, is_json_type, is_text_type
from logfire.testing import TestExporter

pytestmark = pytest.mark.anyio
Expand Down Expand Up @@ -596,3 +596,29 @@ def test_httpx_client_capture_request_form_data(exporter: TestExporter):
}
]
)


def test_is_json_type():
assert is_json_type('application/json')
assert is_json_type(' APPLICATION / JSON ')
assert is_json_type('application/json; charset=utf-8')
assert is_json_type('application/json; charset=potato; foo=bar')
assert is_json_type('application/json+ld')
assert is_json_type('application/x-json+ld')
assert is_json_type('application/ld+xml+json')
assert not is_json_type('json')
assert not is_json_type('json/application')
assert not is_json_type('text/json')
assert not is_json_type('other/json')
assert not is_json_type('')
assert not is_json_type('application/json-x')
assert not is_json_type('application//json')


def test_is_text_type():
assert is_text_type('text/foo')
assert is_text_type('application/json')
assert is_text_type('application/xml')
assert is_text_type('application/foo+xml')
assert not is_text_type('application/text')
assert not is_text_type('foo/text')

0 comments on commit 9933ce3

Please sign in to comment.