-
Notifications
You must be signed in to change notification settings - Fork 16
Description
Describe the bug
VictoriaTraces Jaeger API Bug
Problem
Jaeger API /select/jaeger/api/traces/{traceID} returns incomplete traces. Only spans ending within ~30-60 seconds of trace start are returned.
Evidence
Production trace 4e4a1b65edb8143ce2fa90ee1df9601b:
- LogsQL: 10 spans
- Jaeger API: 3 spans
Test trace (see attached script):
- LogsQL: 5 spans
- Jaeger API: 3 spans
Reproduce
Run attached reproduce_victoria_traces_bug.py (takes 90 seconds):
pip install requests
python3 reproduce_victoria_traces_bug.pyResult: LogsQL returns all 5 spans, Jaeger API returns only 3.
Impact
Any trace over 1 minute appears incomplete in Grafana. Affects all long-running operations.
Environment
VictoriaTraces v0.4.0
Request
Please fix Jaeger API to return all spans for a trace_id, not just those ending within first 60 seconds.
To Reproduce
#!/usr/bin/env python3
"""
VictoriaTraces Jaeger API Bug - Standalone Reproduction
Bug: Jaeger API only returns spans ending within ~30 seconds of trace start.
Runtime: ~90 seconds
Dependencies: requests (pip install requests)
Usage:
python3 reproduce_victoria_traces_bug.py
Expected: LogsQL and Jaeger API both return all 5 spans
Actual: LogsQL returns 5 spans, Jaeger API returns only 2 spans
"""
import json
import random
import time
import requests
# Configuration - CHANGE THIS to your VictoriaTraces endpoint
VICTORIA_TRACES_URL = "https://traces-dev.my-company.com"
VERIFY_SSL = False # Set to True if you have valid SSL certs
def generate_id(length=16):
"""Generate random hex ID"""
return ''.join(random.choice('0123456789abcdef') for _ in range(length))
def send_span_otlp(trace_id, span_id, parent_span_id, name, start_time_ns, duration_ns):
"""Send a single span to VictoriaTraces via OTLP"""
end_time_ns = start_time_ns + duration_ns
span = {
"traceId": trace_id,
"spanId": span_id,
"name": name,
"startTimeUnixNano": str(start_time_ns),
"endTimeUnixNano": str(end_time_ns),
"kind": 1,
"attributes": [
{"key": "service.name", "value": {"stringValue": "bug-repro"}}
],
"status": {"code": 0}
}
if parent_span_id:
span["parentSpanId"] = parent_span_id
payload = {
"resourceSpans": [{
"resource": {
"attributes": [
{"key": "service.name", "value": {"stringValue": "bug-repro"}}
]
},
"scopeSpans": [{
"spans": [span]
}]
}]
}
try:
response = requests.post(
f"{VICTORIA_TRACES_URL}/insert/opentelemetry/v1/traces",
json=payload,
verify=VERIFY_SSL,
timeout=5
)
response.raise_for_status()
return True
except Exception as e:
print(f" ERROR sending span {name}: {e}")
return False
def main():
print("=" * 80)
print("VictoriaTraces Jaeger API Bug Reproduction")
print("=" * 80)
print()
# Generate trace and span IDs
trace_id = generate_id(32)
parent_id = generate_id(16)
child_ids = [generate_id(16) for _ in range(4)]
print(f"Trace ID: {trace_id}")
print()
print("Creating trace with 5 spans over 90 seconds...")
print(" - 1 parent span (0s to 90s)")
print(" - 4 child spans at 20s intervals")
print()
base_time = int(time.time() * 1_000_000_000) # Current time in nanoseconds
# Send child spans at 20-second intervals
for i in range(4):
elapsed = i * 20
print(f"[{elapsed:3d}s] Sending child_span_{i+1}...")
start_time = base_time + (i * 20_000_000_000)
send_span_otlp(
trace_id, child_ids[i], parent_id,
f"child_span_{i+1}",
start_time,
100_000_000 # 100ms duration
)
if i < 3: # Don't wait after last child
time.sleep(20)
# Send parent span (covers entire trace)
print(f"[ 90s] Sending parent_span (covers full 90s)...")
send_span_otlp(
trace_id, parent_id, None,
"parent_span",
base_time,
90_000_000_000 # 90 seconds
)
print()
print("✓ All 5 spans sent to VictoriaTraces")
print()
# Wait for ingestion
print("Waiting 5 seconds for data ingestion...")
time.sleep(5)
print()
# Query LogsQL
print("=" * 80)
print("Querying VictoriaTraces...")
print("=" * 80)
print()
print("[1] LogsQL Query (storage backend):")
try:
response = requests.get(
f"{VICTORIA_TRACES_URL}/select/logsql/query",
params={"query": f'"trace_id":"{trace_id}"', "limit": 100},
verify=VERIFY_SSL,
timeout=10
)
lines = response.text.strip().split('\n')
span_names = set()
for line in lines:
try:
entry = json.loads(line)
if entry.get('span_id'):
span_names.add(entry.get('name', 'unknown'))
except:
pass
print(f" Returned: {len(span_names)} spans")
for name in sorted(span_names):
print(f" - {name}")
except Exception as e:
print(f" ERROR: {e}")
span_names = set()
print()
# Query Jaeger API
print("[2] Jaeger API Query:")
try:
response = requests.get(
f"{VICTORIA_TRACES_URL}/select/jaeger/api/traces/{trace_id}",
verify=VERIFY_SSL,
timeout=10
)
data = response.json()
jaeger_spans = data.get('data', [{}])[0].get('spans', [])
jaeger_names = [s['operationName'] for s in jaeger_spans]
print(f" Returned: {len(jaeger_spans)} spans")
for name in sorted(jaeger_names):
print(f" - {name}")
except Exception as e:
print(f" ERROR: {e}")
jaeger_names = []
print()
# Results
print("=" * 80)
print("RESULTS")
print("=" * 80)
print()
logsql_count = len(span_names)
jaeger_count = len(jaeger_names)
print(f"Expected spans: 5")
print(f"LogsQL returned: {logsql_count} spans")
print(f"Jaeger API returned: {jaeger_count} spans")
print()
if logsql_count == jaeger_count == 5:
print("✓ SUCCESS: Both APIs returned all spans - no bug detected")
print()
print("This could mean:")
print(" - The bug has been fixed")
print(" - Test duration too short (try longer trace)")
print(" - Different VictoriaTraces version")
elif logsql_count == 5 and jaeger_count < 5:
print(f"✗ BUG CONFIRMED: Jaeger API missing {5 - jaeger_count} spans!")
print()
missing = set(span_names) - set(jaeger_names)
print("Missing from Jaeger API:")
for name in sorted(missing):
print(f" - {name}")
print()
print("DIAGNOSIS:")
print(" VictoriaTraces Jaeger API has ~30 second time window")
print(" Spans ending after ~30s from trace start are filtered out")
print()
print("Expected: child_span_3, child_span_4, parent_span are missing")
print(" (They end at 60s, 80s, and 90s respectively)")
else:
print(f"⚠ UNEXPECTED: LogsQL={logsql_count}, Jaeger={jaeger_count}")
print(" Data may not have been ingested properly")
print()
print("View in VictoriaTraces UI:")
print(f" {VICTORIA_TRACES_URL}/select/vmui/?#/?query=%22trace_id%22%3A%22{trace_id}%22")
print()
if __name__ == "__main__":
# Disable SSL warnings
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
main()
Version
Version victoria-traces-20251014-032256-tags-v0.4.0-0-g8a5f1b618
Logs
The above python code will reproduce on the above version.
I do not have access to the logs on our server.
Screenshots
No response
Used command-line flags
No response
Additional information
No response