diff --git a/.gitlab/package.yml b/.gitlab/package.yml index d4f79df6dd8..c8091f47984 100644 --- a/.gitlab/package.yml +++ b/.gitlab/package.yml @@ -21,7 +21,7 @@ build_base_venvs: paths: - .riot/venv_* - ddtrace/**/*.so* - - ddtrace/internal/datadog/profiling/crashtracker/crashtracker_exe + - ddtrace/internal/datadog/profiling/crashtracker/crashtracker_exe* download_ddtrace_artifacts: image: registry.ddbuild.io/github-cli:v27480869-eafb11d-2.43.0 diff --git a/.gitlab/prepare-oci-package.sh b/.gitlab/prepare-oci-package.sh index 4f7579e9998..27d5354219e 100755 --- a/.gitlab/prepare-oci-package.sh +++ b/.gitlab/prepare-oci-package.sh @@ -25,6 +25,16 @@ BUILD_DIR=sources echo -n "$PYTHON_PACKAGE_VERSION" > sources/version +echo "Cleaning up binaries for ${ARCH}" +if [ "${ARCH}" == "arm64" ]; then + echo "Removing x86_64 binaries" + find ../pywheels-dep/ -type f -name '*x86_64*' -exec rm -f {} \; +elif [ "${ARCH}" == "amd64" ]; then + echo "Removing aarch64 binaries" + find ../pywheels-dep/ -type f -name '*aarch64*' -exec rm -f {} \; +else + echo "No ARCH set, not removing any binaries" +fi cp -r ../pywheels-dep/site-packages* sources/ddtrace_pkgs cp ../lib-injection/sources/* sources/ diff --git a/ddtrace/contrib/internal/celery/utils.py b/ddtrace/contrib/internal/celery/utils.py index 80a4046f191..e18f83ba0dc 100644 --- a/ddtrace/contrib/internal/celery/utils.py +++ b/ddtrace/contrib/internal/celery/utils.py @@ -128,9 +128,12 @@ def retrieve_task_id(context): """ headers = context.get("headers") body = context.get("body") - if headers: + # Check if the id is in the headers, then check the body for it. + # If we don't check the id first, we could wrongly assume no task_id + # when the task_id is in the body. + if headers and "id" in headers: # Protocol Version 2 (default from Celery 4.0) return headers.get("id") - else: + elif body and "id" in body: # Protocol Version 1 return body.get("id") diff --git a/ddtrace/internal/datadog/profiling/crashtracker/CMakeLists.txt b/ddtrace/internal/datadog/profiling/crashtracker/CMakeLists.txt index 792369c642c..5b7d25aa4c3 100644 --- a/ddtrace/internal/datadog/profiling/crashtracker/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/crashtracker/CMakeLists.txt @@ -102,7 +102,16 @@ target_include_directories(crashtracker_exe PRIVATE .. ${Datadog_INCLUDE_DIRS} ) -set_target_properties(crashtracker_exe PROPERTIES INSTALL_RPATH "$ORIGIN/..") + +# The CRASHTRACKER_EXE_TARGET_NAME should have been set by dd_wrapper +if (NOT CRASHTRACKER_EXE_TARGET_NAME) + message(FATAL_ERROR "CRASHTRACKER_EXE_TARGET_NAME not set") +endif() + +set_target_properties(crashtracker_exe PROPERTIES + INSTALL_RPATH "$ORIGIN/.." + OUTPUT_NAME ${CRASHTRACKER_EXE_TARGET_NAME} +) target_link_libraries(crashtracker_exe PRIVATE dd_wrapper ) diff --git a/ddtrace/internal/datadog/profiling/crashtracker/_crashtracker.pyx b/ddtrace/internal/datadog/profiling/crashtracker/_crashtracker.pyx index c7bb62da8be..62deb0933c8 100644 --- a/ddtrace/internal/datadog/profiling/crashtracker/_crashtracker.pyx +++ b/ddtrace/internal/datadog/profiling/crashtracker/_crashtracker.pyx @@ -17,6 +17,7 @@ cdef extern from "" namespace "std" nogil: # For now, the crashtracker code is bundled in the libdatadog Profiling FFI. # This is primarily to reduce binary size. cdef extern from "crashtracker_interface.hpp": + const char *crashtracker_get_exe_name() void crashtracker_set_url(string_view url) void crashtracker_set_service(string_view service) void crashtracker_set_env(string_view env) @@ -150,9 +151,10 @@ def set_tag(key: StringType, value: StringType) -> None: def start() -> bool: - # The file is "crashtracker_exe" in the same directory as the libdd_wrapper.so + crashtracker_filename_raw = crashtracker_get_exe_name() + crashtracker_filename = crashtracker_filename_raw.decode("utf-8") exe_dir = os.path.dirname(__file__) - crashtracker_path = os.path.join(exe_dir, "crashtracker_exe") + crashtracker_path = os.path.join(exe_dir, crashtracker_filename) crashtracker_path_bytes = ensure_binary_or_empty(crashtracker_path) bin_exists = crashtracker_set_receiver_binary_path( string_view(crashtracker_path_bytes, len(crashtracker_path_bytes)) diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index 2a17a0948f5..81a031abb80 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -18,6 +18,7 @@ include(AnalysisFunc) include(FindClangtidy) include(FindCppcheck) include(FindInfer) +include(CheckSymbolExists) # Library sources add_library(dd_wrapper SHARED @@ -42,10 +43,53 @@ target_include_directories(dd_wrapper PRIVATE include ${Datadog_INCLUDE_DIRS} ) + target_link_libraries(dd_wrapper PRIVATE ${Datadog_LIBRARIES} ) -set_target_properties(dd_wrapper PROPERTIES POSITION_INDEPENDENT_CODE ON) + +# Figure out the suffix. Try to approximate the cpython way of doing things. +## C library +check_symbol_exists(__GLIBC__ "features.h" HAVE_GLIBC) +check_symbol_exists(__MUSL__ "features.h" HAVE_MUSL) + +set(PLATFORM_LIBC "unknown") +if (HAVE_GLIBC) + set(PLATFORM_LIBC "glibc") +elseif (HAVE_MUSL) + set(PLATFORM_LIBC "musl") +endif() + +## Processor +set(PLATFORM_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") + +## Put the suffix together +set(PLATFORM_SUFFIX "${PLATFORM_LIBC}-${PLATFORM_PROCESSOR}") +string(TOLOWER ${PLATFORM_SUFFIX} PLATFORM_SUFFIX) + + +# target output name should combine the system name and the processor +# this won't necessarily match the cpython way 1-1, but as long as it encodes the major moving parts, it's fine +set(DD_WRAPPER_TARGET_NAME "dd_wrapper-${PLATFORM_SUFFIX}") + +set_target_properties(dd_wrapper PROPERTIES + POSITION_INDEPENDENT_CODE ON + OUTPUT_NAME "${DD_WRAPPER_TARGET_NAME}" +) + +# The name of the crashtracker executable has to be propagated to a a few different targets, and it needs to be +# inferred at runtime, so we set it here. Any component which used dd_wrapper will have access to a uniform name. +set(CRASHTRACKER_EXE_BASE_NAME "crashtracker_exe") +set(CRASHTRACKER_EXE_TARGET_NAME ${CRASHTRACKER_EXE_BASE_NAME}-${PLATFORM_SUFFIX}) + +target_compile_definitions(dd_wrapper PRIVATE + CRASHTRACKER_EXE_TARGET_NAME="${CRASHTRACKER_EXE_TARGET_NAME}" +) + +# Also propagate the target name to the parent scope, since it can be used for non-code purposes +# (e.g., filenames) +set(CRASHTRACKER_EXE_TARGET_NAME ${CRASHTRACKER_EXE_TARGET_NAME} PARENT_SCOPE) + # For a regular build, the LIB_INSTALL_DIR represents the final location of the library, so nothing special is needed. # However, for an inplace build, setup.py will pass a temporary path as the extension output directory, so while it diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/crashtracker_interface.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/crashtracker_interface.hpp index 1cb5c294df0..d4026e26712 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/crashtracker_interface.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/crashtracker_interface.hpp @@ -8,6 +8,7 @@ extern "C" { #endif + const char* crashtracker_get_exe_name(); void crashtracker_set_url(std::string_view url); void crashtracker_set_service(std::string_view service); void crashtracker_set_env(std::string_view env); diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/crashtracker_interface.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/crashtracker_interface.cpp index bf3257225fd..e1dcbb9ecd6 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/crashtracker_interface.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/crashtracker_interface.cpp @@ -6,10 +6,21 @@ #include #include +// If the crashtracker exe target name is not set, then fail +#ifndef CRASHTRACKER_EXE_TARGET_NAME +#error "CRASHTRACKER_EXE_TARGET_NAME must be defined" +#endif + // A global instance of the crashtracker is created here. Datadog::Crashtracker crashtracker; bool crashtracker_initialized = false; +const char* +crashtracker_get_exe_name() // cppcheck-suppress unusedFunction +{ + return CRASHTRACKER_EXE_TARGET_NAME; +} + void crashtracker_postfork_child() { diff --git a/releasenotes/notes/fix-protocol-check-62ad7d096b1f36a9.yaml b/releasenotes/notes/fix-protocol-check-62ad7d096b1f36a9.yaml new file mode 100644 index 00000000000..e6dba7a3923 --- /dev/null +++ b/releasenotes/notes/fix-protocol-check-62ad7d096b1f36a9.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + tracing(celery): Fixes an issue where ``celery.apply`` spans using task_protocol 1 didn't close by improving the check for the task id in the body. \ No newline at end of file diff --git a/releasenotes/notes/profiling-fix-extension-filenames-ac311c1851a7c467.yaml b/releasenotes/notes/profiling-fix-extension-filenames-ac311c1851a7c467.yaml new file mode 100644 index 00000000000..6b1a32c6717 --- /dev/null +++ b/releasenotes/notes/profiling-fix-extension-filenames-ac311c1851a7c467.yaml @@ -0,0 +1,4 @@ +fixes: + - | + Profiling: all files with platform-dependent code have had their filenames updated to reflect the platform they are + for. This fixes issues where the wrong file would be used on a given platform. diff --git a/setup.py b/setup.py index afb57338c25..196a2c452ca 100644 --- a/setup.py +++ b/setup.py @@ -367,7 +367,7 @@ def build_extension_cmake(self, ext): # If this is an inplace build, propagate this fact to CMake in case it's helpful # In particular, this is needed for build products which are not otherwise managed - # by setuptools/distutils, such libdd_wrapper.so + # by setuptools/distutils if IS_EDITABLE: # the INPLACE_LIB_INSTALL_DIR should be the source dir of the extension cmake_args.append("-DINPLACE_LIB_INSTALL_DIR={}".format(ext.source_dir)) @@ -580,8 +580,8 @@ def get_exts_for(name): "ddtrace.appsec": ["rules.json"], "ddtrace.appsec._ddwaf": ["libddwaf/*/lib/libddwaf.*"], "ddtrace.appsec._iast._taint_tracking": ["CMakeLists.txt"], - "ddtrace.internal.datadog.profiling": ["libdd_wrapper.*"], - "ddtrace.internal.datadog.profiling.crashtracker": ["crashtracker_exe"], + "ddtrace.internal.datadog.profiling": ["libdd_wrapper*.*"], + "ddtrace.internal.datadog.profiling.crashtracker": ["crashtracker_exe*"], }, zip_safe=False, # enum34 is an enum backport for earlier versions of python diff --git a/tests/contrib/celery/test_utils.py b/tests/contrib/celery/test_utils.py index 92a9405c99b..3bbeb959ce1 100644 --- a/tests/contrib/celery/test_utils.py +++ b/tests/contrib/celery/test_utils.py @@ -131,9 +131,11 @@ def test_tags_from_context_empty_keys(): assert {} == tags -def test_task_id_from_protocol_v1(): +def test_task_id_from_protocol_v1_no_headers(): # ensures a `task_id` is properly returned when Protocol v1 is used. # `context` is an example of an emitted Signal with Protocol v1 + # this test assumes the headers are blank + test_id = "dffcaec1-dd92-4a1a-b3ab-d6512f4beeb7" context = { "body": { "expires": None, @@ -143,7 +145,7 @@ def test_task_id_from_protocol_v1(): "callbacks": None, "errbacks": None, "taskset": None, - "id": "dffcaec1-dd92-4a1a-b3ab-d6512f4beeb7", + "id": test_id, "retries": 0, "task": "tests.contrib.celery.test_integration.fn_task_parameters", "timelimit": (None, None), @@ -159,12 +161,87 @@ def test_task_id_from_protocol_v1(): } task_id = retrieve_task_id(context) - assert task_id == "dffcaec1-dd92-4a1a-b3ab-d6512f4beeb7" + assert task_id == test_id -def test_task_id_from_protocol_v2(): +def test_task_id_from_protocol_v1_with_headers(): + # ensures a `task_id` is properly returned when Protocol v1 is used with headers. + # `context` is an example of an emitted Signal with Protocol v1 + # this tests ensures that the headers have other information + test_id = "dffcaec1-dd92-4a1a-b3ab-d6512f4beeb7" + context = { + "body": { + "expires": None, + "utc": True, + "args": ["user"], + "chord": None, + "callbacks": None, + "errbacks": None, + "taskset": None, + "id": test_id, + "retries": 0, + "task": "tests.contrib.celery.test_integration.fn_task_parameters", + "timelimit": (None, None), + "eta": None, + "kwargs": {"force_logout": True}, + }, + "sender": "tests.contrib.celery.test_integration.fn_task_parameters", + "exchange": "celery", + "routing_key": "celery", + "retry_policy": None, + "headers": { + "header1": "value", + "header2": "value", + }, + "properties": {}, + } + + task_id = retrieve_task_id(context) + assert task_id == test_id + + +def test_task_id_from_protocol_v2_no_body(): # ensures a `task_id` is properly returned when Protocol v2 is used. # `context` is an example of an emitted Signal with Protocol v2 + # this tests assumes the body has no data + test_id = "7e917b83-4018-431d-9832-73a28e1fb6c0" + context = { + "body": {}, + "sender": "tests.contrib.celery.test_integration.fn_task_parameters", + "exchange": "", + "routing_key": "celery", + "retry_policy": None, + "headers": { + "origin": "gen83744@hostname", + "root_id": test_id, + "expires": None, + "shadow": None, + "id": test_id, + "kwargsrepr": "{'force_logout': True}", + "lang": "py", + "retries": 0, + "task": "tests.contrib.celery.test_integration.fn_task_parameters", + "group": None, + "timelimit": [None, None], + "parent_id": None, + "argsrepr": "['user']", + "eta": None, + }, + "properties": { + "reply_to": "c3054a07-5b28-3855-b18c-1623a24aaeca", + "correlation_id": test_id, + }, + } + + task_id = retrieve_task_id(context) + assert task_id == test_id + + +def test_task_id_from_protocol_v2_with_body(): + # ensures a `task_id` is properly returned when Protocol v2 is used. + # `context` is an example of an emitted Signal with Protocol v2 + # this tests assumes the body has data + test_id = "7e917b83-4018-431d-9832-73a28e1fb6c0" context = { "body": ( ["user"], @@ -177,10 +254,10 @@ def test_task_id_from_protocol_v2(): "retry_policy": None, "headers": { "origin": "gen83744@hostname", - "root_id": "7e917b83-4018-431d-9832-73a28e1fb6c0", + "root_id": test_id, "expires": None, "shadow": None, - "id": "7e917b83-4018-431d-9832-73a28e1fb6c0", + "id": test_id, "kwargsrepr": "{'force_logout': True}", "lang": "py", "retries": 0, @@ -193,9 +270,18 @@ def test_task_id_from_protocol_v2(): }, "properties": { "reply_to": "c3054a07-5b28-3855-b18c-1623a24aaeca", - "correlation_id": "7e917b83-4018-431d-9832-73a28e1fb6c0", + "correlation_id": test_id, }, } task_id = retrieve_task_id(context) - assert task_id == "7e917b83-4018-431d-9832-73a28e1fb6c0" + assert task_id == test_id + + +def test_task_id_from_blank_context(): + # if there is no context (thus missing headers and body), + # no task_id is returned + context = {} + + task_id = retrieve_task_id(context) + assert task_id is None