Skip to content

Commit

Permalink
chore(symdb): upload compressed symbol payloads (#11404)
Browse files Browse the repository at this point in the history
We add support for compressed symbol database payloads.

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
P403n1x87 authored Dec 11, 2024
1 parent 4594170 commit aec08be
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
14 changes: 10 additions & 4 deletions ddtrace/internal/symbol_db/symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dataclasses import field
import dis
from enum import Enum
import gzip
from http.client import HTTPResponse
from inspect import CO_VARARGS
from inspect import CO_VARKEYWORDS
Expand Down Expand Up @@ -484,13 +485,18 @@ def upload(self) -> HTTPResponse:
),
FormData(
name="file",
filename="symdb_export.json",
data=json.dumps(self.to_json()),
content_type="json",
filename=f"symbols_{os.getpid()}.json.gz",
data="[symbols_placeholder]",
content_type="gzip",
),
]
)

# DEV: The as_bytes method ends up writing the data line by line, which
# breaks the final payload. We add a placeholder instead and manually
# replace it with the compressed JSON.
body = body.replace(b"[symbols_placeholder]", gzip.compress(json.dumps(self.to_json()).encode("utf-8")))

with connector(get_trace_url(), timeout=5.0)() as conn:
log.debug("[PID %d] SymDB: Uploading symbols payload", os.getpid())
conn.request("POST", "/symdb/v1/input", body, headers)
Expand Down Expand Up @@ -527,7 +533,7 @@ def is_module_included(module: ModuleType) -> bool:


class SymbolDatabaseUploader(BaseModuleWatchdog):
__scope_limit__ = 100
__scope_limit__ = 400

def __init__(self) -> None:
super().__init__()
Expand Down
9 changes: 5 additions & 4 deletions ddtrace/internal/utils/http.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from contextlib import contextmanager
from dataclasses import dataclass
from email.encoders import encode_noop
from json import loads
import logging
import os
Expand Down Expand Up @@ -418,7 +419,7 @@ def parse_message(msg):
class FormData:
name: str
filename: str
data: str
data: Union[str, bytes]
content_type: str


Expand All @@ -431,12 +432,12 @@ def multipart(parts: List[FormData]) -> Tuple[bytes, dict]:
del msg["MIME-Version"]

for part in parts:
app = MIMEApplication(part.data, part.content_type, lambda _: _)
app = MIMEApplication(part.data, part.content_type, encode_noop)
app.add_header("Content-Disposition", "form-data", name=part.name, filename=part.filename)
del app["MIME-Version"]
msg.attach(app)

# Split headers and body
headers, _, body = msg.as_string(policy=HTTP).partition("\r\n\r\n")
headers, _, body = msg.as_bytes(policy=HTTP).partition(b"\r\n\r\n")

return body.encode("utf-8"), dict(_.split(": ") for _ in headers.splitlines())
return body, dict(_.split(": ") for _ in headers.decode().splitlines())

0 comments on commit aec08be

Please sign in to comment.