diff --git a/.gitignore b/.gitignore index 68da44a8345..4dbfb0094f9 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,9 @@ replay_pid* !dd-java-agent/benchmark/releases/*.jar **/errors/*.log +# Fuzz testing logs # +fuzz-logs/ + # Magic for local JMC built /vendor/jmc-libs diff --git a/dd-java-agent/agent-debugger/build.gradle b/dd-java-agent/agent-debugger/build.gradle index 0ef3992644f..4de4d80151c 100644 --- a/dd-java-agent/agent-debugger/build.gradle +++ b/dd-java-agent/agent-debugger/build.gradle @@ -44,6 +44,9 @@ dependencies { implementation libs.dogstatsd implementation libs.moshi + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + testImplementation libs.asm.util testImplementation libs.bundles.junit5 testImplementation libs.junit.jupiter.params diff --git a/dd-java-agent/agent-profiling/build.gradle b/dd-java-agent/agent-profiling/build.gradle index a53ac40d8fe..f46d991de55 100644 --- a/dd-java-agent/agent-profiling/build.gradle +++ b/dd-java-agent/agent-profiling/build.gradle @@ -20,6 +20,9 @@ dependencies { api libs.slf4j api project(':internal-api') + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + api project(':dd-java-agent:agent-profiling:profiling-ddprof') api project(':dd-java-agent:agent-profiling:profiling-uploader') api project(':dd-java-agent:agent-profiling:profiling-controller') diff --git a/dd-java-agent/agent-profiling/profiling-controller-openjdk/build.gradle b/dd-java-agent/agent-profiling/profiling-controller-openjdk/build.gradle index 61c9e9a57bf..1d797db2069 100644 --- a/dd-java-agent/agent-profiling/profiling-controller-openjdk/build.gradle +++ b/dd-java-agent/agent-profiling/profiling-controller-openjdk/build.gradle @@ -26,6 +26,9 @@ dependencies { api project(':dd-java-agent:agent-profiling:profiling-controller') api project(':dd-java-agent:agent-profiling:profiling-controller-jfr') + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + testImplementation libs.bundles.junit5 testImplementation libs.bundles.mockito testImplementation files(project(':dd-java-agent:agent-profiling:profiling-controller-jfr').sourceSets.test.output) diff --git a/dd-java-agent/agent-profiling/profiling-controller/build.gradle b/dd-java-agent/agent-profiling/profiling-controller/build.gradle index e255fdf668d..c5820188739 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/build.gradle +++ b/dd-java-agent/agent-profiling/profiling-controller/build.gradle @@ -22,6 +22,9 @@ dependencies { api project(':components:environment') api project(':dd-java-agent:agent-profiling:profiling-utils') + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + testImplementation libs.bundles.junit5 testImplementation libs.guava testImplementation libs.bundles.mockito diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java index 7f57b356d99..3d306d26c40 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java @@ -198,7 +198,6 @@ private void startProfilingRecording() { ProfilerFlareLogger.getInstance().log("Shutdown in progress, cannot start profiling"); } else { ProfilerFlareLogger.getInstance().log("Failed to start profiling", t); - throw t instanceof RuntimeException ? (RuntimeException) t : new RuntimeException(t); } } diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/build.gradle b/dd-java-agent/agent-profiling/profiling-ddprof/build.gradle index 2664a8945ac..8350eda97f4 100644 --- a/dd-java-agent/agent-profiling/profiling-ddprof/build.gradle +++ b/dd-java-agent/agent-profiling/profiling-ddprof/build.gradle @@ -36,6 +36,9 @@ dependencies { implementation libs.slf4j + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + testImplementation libs.bundles.jmc testImplementation libs.bundles.junit5 } diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java index 0c889108d2d..098b1b54bf3 100644 --- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java +++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java @@ -56,7 +56,6 @@ import javax.annotation.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - /** * It is currently assumed that this class can be initialised early so that Datadog profiler's * thread filter captures all tracing activity, which means it must not be modified to depend on diff --git a/dd-java-agent/agent-profiling/profiling-uploader/build.gradle b/dd-java-agent/agent-profiling/profiling-uploader/build.gradle index f9a03e3a917..3672a3d5d1c 100644 --- a/dd-java-agent/agent-profiling/profiling-uploader/build.gradle +++ b/dd-java-agent/agent-profiling/profiling-uploader/build.gradle @@ -33,6 +33,9 @@ dependencies { implementation libs.lz4 implementation libs.aircompressor + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + testImplementation project(':dd-java-agent:agent-profiling:profiling-testing') testImplementation project(':utils:test-utils') testImplementation libs.bundles.junit5 diff --git a/dd-trace-core/build.gradle b/dd-trace-core/build.gradle index b2d17cc8ef1..e8435598169 100644 --- a/dd-trace-core/build.gradle +++ b/dd-trace-core/build.gradle @@ -80,6 +80,11 @@ dependencies { implementation group: 'com.google.re2j', name: 're2j', version: '1.7' + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation group: 'com.antithesis', name: 'sdk', version: '1.4.5' + + compileOnly group: 'com.github.spotbugs', name: 'spotbugs-annotations', version: '4.2.0' + // We have autoservices defined in test subtree, looks like we need this to be able to properly rebuild this testAnnotationProcessor libs.autoservice.processor testCompileOnly libs.autoservice.annotation diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/PayloadDispatcherImpl.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/PayloadDispatcherImpl.java index a0011216770..a9c8710b0f5 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/PayloadDispatcherImpl.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/PayloadDispatcherImpl.java @@ -1,5 +1,8 @@ package datadog.trace.common.writer; +import com.antithesis.sdk.Assert; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; import datadog.communication.monitor.Monitoring; import datadog.communication.monitor.Recording; import datadog.communication.serialization.ByteBufferConsumer; @@ -107,14 +110,39 @@ public void accept(int messageCount, ByteBuffer buffer) { Payload payload = newPayload(messageCount, buffer); final int sizeInBytes = payload.sizeInBytes(); healthMetrics.onSerialize(sizeInBytes); + + // Antithesis: Track all send attempts + ObjectNode sendAttemptDetails = JsonNodeFactory.instance.objectNode(); + sendAttemptDetails.put("trace_count", messageCount); + sendAttemptDetails.put("payload_size_bytes", sizeInBytes); + sendAttemptDetails.put("dropped_traces_in_payload", payload.droppedTraces()); + sendAttemptDetails.put("dropped_spans_in_payload", payload.droppedSpans()); + Assert.sometimes(true, "trace_payloads_being_sent", sendAttemptDetails); + RemoteApi.Response response = api.sendSerializedTraces(payload); mapper.reset(); + if (response.success()) { + // Antithesis: Track successful sends + ObjectNode successDetails = JsonNodeFactory.instance.objectNode(); + successDetails.put("decision", "sent_success"); + successDetails.put("trace_count", messageCount); + successDetails.put("payload_size_bytes", sizeInBytes); + successDetails.put("http_status", response.status().orElse(-1)); + Assert.sometimes(true, "traces_sent_successfully", successDetails); if (log.isDebugEnabled()) { log.debug("Successfully sent {} traces to the API", messageCount); } healthMetrics.onSend(messageCount, sizeInBytes, response); } else { + // Antithesis: Track failed sends + ObjectNode failedDetails = JsonNodeFactory.instance.objectNode(); + failedDetails.put("decision", "dropped_send_failed"); + failedDetails.put("trace_count", messageCount); + failedDetails.put("payload_size_bytes", sizeInBytes); + failedDetails.put("http_status", response.status().orElse(-1)); + failedDetails.put("has_exception", response.exception() != null); + Assert.sometimes(true, "traces_failed_to_send", failedDetails); if (log.isDebugEnabled()) { log.debug( "Failed to send {} traces of size {} bytes to the API", messageCount, sizeInBytes); diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/RemoteWriter.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/RemoteWriter.java index 90008cad0a0..6c21f320468 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/RemoteWriter.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/RemoteWriter.java @@ -3,6 +3,9 @@ import static datadog.trace.api.sampling.PrioritySampling.UNSET; import static java.util.concurrent.TimeUnit.MINUTES; +import com.antithesis.sdk.Assert; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; import datadog.trace.relocate.api.RatelimitedLogger; @@ -68,6 +71,11 @@ protected RemoteWriter( @Override public void write(final List trace) { if (closed) { + // Antithesis: Track traces dropped during shutdown + ObjectNode shutdownDetails = JsonNodeFactory.instance.objectNode(); + shutdownDetails.put("decision", "dropped_shutdown"); + shutdownDetails.put("span_count", trace.size()); + Assert.sometimes(true, "trace_dropped_writer_closed", shutdownDetails); // We can't add events after shutdown otherwise it will never complete shutting down. log.debug("Dropped due to shutdown: {}", trace); handleDroppedTrace(trace); @@ -80,6 +88,13 @@ public void write(final List trace) { final int samplingPriority = root.samplingPriority(); switch (traceProcessingWorker.publish(root, samplingPriority, trace)) { case ENQUEUED_FOR_SERIALIZATION: + // Antithesis: Track traces enqueued for sending + ObjectNode enqueuedDetails = JsonNodeFactory.instance.objectNode(); + enqueuedDetails.put("decision", "enqueued"); + enqueuedDetails.put("trace_id", root.getTraceId().toString()); + enqueuedDetails.put("span_count", trace.size()); + enqueuedDetails.put("sampling_priority", samplingPriority); + Assert.sometimes(true, "trace_enqueued_for_send", enqueuedDetails); log.debug("Enqueued for serialization: {}", trace); healthMetrics.onPublish(trace, samplingPriority); break; @@ -87,10 +102,24 @@ public void write(final List trace) { log.debug("Enqueued for single span sampling: {}", trace); break; case DROPPED_BY_POLICY: + // Antithesis: Track traces dropped by policy + ObjectNode policyDetails = JsonNodeFactory.instance.objectNode(); + policyDetails.put("decision", "dropped_policy"); + policyDetails.put("trace_id", root.getTraceId().toString()); + policyDetails.put("span_count", trace.size()); + policyDetails.put("sampling_priority", samplingPriority); + Assert.sometimes(true, "trace_dropped_by_policy", policyDetails); log.debug("Dropped by the policy: {}", trace); handleDroppedTrace(trace); break; case DROPPED_BUFFER_OVERFLOW: + // Antithesis: Track traces dropped due to buffer overflow + ObjectNode overflowDetails = JsonNodeFactory.instance.objectNode(); + overflowDetails.put("decision", "dropped_buffer_overflow"); + overflowDetails.put("trace_id", root.getTraceId().toString()); + overflowDetails.put("span_count", trace.size()); + overflowDetails.put("sampling_priority", samplingPriority); + Assert.sometimes(true, "trace_dropped_buffer_overflow", overflowDetails); if (log.isDebugEnabled()) { log.debug("Dropped due to a buffer overflow: {}", trace); } else { diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java index 28db0b6c99e..21c5f029e74 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java @@ -19,6 +19,10 @@ import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; +import com.antithesis.sdk.Assert; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; + import datadog.communication.ddagent.DDAgentFeaturesDiscovery; import datadog.communication.ddagent.ExternalAgentLauncher; import datadog.communication.ddagent.SharedCommunicationObjects; @@ -1246,8 +1250,22 @@ void write(final List trace) { spanToSample.forceKeep(forceKeep); boolean published = forceKeep || traceCollector.sample(spanToSample); if (published) { + // Antithesis: Track traces accepted by sampling + ObjectNode acceptedDetails = JsonNodeFactory.instance.objectNode(); + acceptedDetails.put("decision", "accepted"); + acceptedDetails.put("trace_id", writtenTrace.get(0).getTraceId().toString()); + acceptedDetails.put("span_count", writtenTrace.size()); + acceptedDetails.put("sampling_priority", spanToSample.samplingPriority()); + Assert.sometimes(true, "trace_accepted_by_sampling", acceptedDetails); writer.write(writtenTrace); } else { + // Antithesis: Track traces dropped by sampling + ObjectNode droppedDetails = JsonNodeFactory.instance.objectNode(); + droppedDetails.put("decision", "dropped_sampling"); + droppedDetails.put("trace_id", writtenTrace.get(0).getTraceId().toString()); + droppedDetails.put("span_count", writtenTrace.size()); + droppedDetails.put("sampling_priority", spanToSample.samplingPriority()); + Assert.sometimes(true, "trace_dropped_by_sampling", droppedDetails); // with span streaming this won't work - it needs to be changed // to track an effective sampling rate instead, however, tests // checking that a hard reference on a continuation prevents diff --git a/remote-config/remote-config-core/build.gradle.kts b/remote-config/remote-config-core/build.gradle.kts index f3d0200b797..3c90599ee00 100644 --- a/remote-config/remote-config-core/build.gradle.kts +++ b/remote-config/remote-config-core/build.gradle.kts @@ -37,6 +37,9 @@ dependencies { implementation(libs.moshi) implementation(libs.bundles.cafe.crypto) + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation(group = "com.antithesis", name = "sdk", version = "1.4.5") + implementation(project(":internal-api")) testImplementation(project(":utils:test-utils")) diff --git a/remote-config/remote-config-core/src/main/java/datadog/remoteconfig/DefaultConfigurationPoller.java b/remote-config/remote-config-core/src/main/java/datadog/remoteconfig/DefaultConfigurationPoller.java index 43863d1699b..205dec18af3 100644 --- a/remote-config/remote-config-core/src/main/java/datadog/remoteconfig/DefaultConfigurationPoller.java +++ b/remote-config/remote-config-core/src/main/java/datadog/remoteconfig/DefaultConfigurationPoller.java @@ -281,8 +281,8 @@ private boolean initialize() { new PollerRequestFactory(config, tracerVersion, containerId, entityId, url, moshi); } catch (Exception e) { // We can't recover from this, so we'll not try to initialize again. - fatalOnInitialization = true; log.error("Remote configuration poller initialization failed", e); + fatalOnInitialization = true; } return true; } diff --git a/telemetry/build.gradle.kts b/telemetry/build.gradle.kts index 1b66facc063..77c95d3a48d 100644 --- a/telemetry/build.gradle.kts +++ b/telemetry/build.gradle.kts @@ -34,6 +34,9 @@ dependencies { implementation(libs.slf4j) implementation(project(":internal-api")) + + // Antithesis SDK for assertions and property testing - bundled in tracer JAR + implementation(group = "com.antithesis", name = "sdk", version = "1.4.5") compileOnly(project(":dd-java-agent:agent-tooling")) testImplementation(project(":dd-java-agent:agent-tooling")) diff --git a/telemetry/src/main/java/datadog/telemetry/TelemetryClient.java b/telemetry/src/main/java/datadog/telemetry/TelemetryClient.java index c13411e0e69..c0f7f26bfe0 100644 --- a/telemetry/src/main/java/datadog/telemetry/TelemetryClient.java +++ b/telemetry/src/main/java/datadog/telemetry/TelemetryClient.java @@ -98,9 +98,11 @@ public Result sendHttpRequest(Request.Builder httpRequestBuilder) { try (okhttp3.Response response = OkHttpUtils.sendWithRetries(okHttpClient, httpRetryPolicy, httpRequest)) { if (response.code() == 404) { + log.debug("Telemetry endpoint is disabled, dropping {} message.", requestType); return Result.NOT_FOUND; } + if (!response.isSuccessful()) { log.debug( "Telemetry message {} failed with: {} {}.", @@ -109,6 +111,7 @@ public Result sendHttpRequest(Request.Builder httpRequestBuilder) { response.message()); return Result.FAILURE; } + } catch (InterruptedIOException e) { log.debug("Telemetry message {} sending interrupted: {}.", requestType, e.toString()); return Result.INTERRUPTED; diff --git a/telemetry/src/main/java/datadog/telemetry/TelemetryRouter.java b/telemetry/src/main/java/datadog/telemetry/TelemetryRouter.java index 1636f865def..801d6743ae7 100644 --- a/telemetry/src/main/java/datadog/telemetry/TelemetryRouter.java +++ b/telemetry/src/main/java/datadog/telemetry/TelemetryRouter.java @@ -41,6 +41,7 @@ public TelemetryClient.Result sendRequest(TelemetryRequest request) { // interrupted request is most likely due to telemetry system shutdown, // we do not want to log errors and reattempt in this case && result != TelemetryClient.Result.INTERRUPTED; + if (currentClient == agentClient) { if (requestFailed) { reportErrorOnce(currentClient.getUrl(), result);