-
Notifications
You must be signed in to change notification settings - Fork 287
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Delayed processing for ProcessManager.pidToProcessInfo #321
Changes from 4 commits
cbdc6f9
5317c8f
a8c3852
84c6ca8
5cd4b3a
b1e2622
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -32,6 +32,7 @@ import ( | |||||||||||||||||||||||||
"go.opentelemetry.io/ebpf-profiler/reporter" | ||||||||||||||||||||||||||
"go.opentelemetry.io/ebpf-profiler/times" | ||||||||||||||||||||||||||
"go.opentelemetry.io/ebpf-profiler/tpbase" | ||||||||||||||||||||||||||
"go.opentelemetry.io/ebpf-profiler/tracehandler" | ||||||||||||||||||||||||||
"go.opentelemetry.io/ebpf-profiler/util" | ||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
@@ -506,32 +507,36 @@ func (pm *ProcessManager) synchronizeMappings(pr process.Process, | |||||||||||||||||||||||||
return newProcess | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
// ProcessPIDExit informs the ProcessManager that a process exited and no longer will be scheduled | ||||||||||||||||||||||||||
// for processing. It also schedules immediate symbolization if the exited PID needs it. exitKTime | ||||||||||||||||||||||||||
// is stored for later processing in SymbolizationComplete when all traces have been collected. | ||||||||||||||||||||||||||
// There can be a race condition if we can not clean up the references for this process | ||||||||||||||||||||||||||
// ProcessPIDExit informs the ProcessManager that a process exited and no longer will be scheduled. | ||||||||||||||||||||||||||
// exitKTime is stored for later processing in ProcessedUntil, when traces up to this time have been | ||||||||||||||||||||||||||
// processed. There can be a race condition if we can not clean up the references for this process | ||||||||||||||||||||||||||
// fast enough and this particular pid is reused again by the system. | ||||||||||||||||||||||||||
// NOTE: Exported only for tracer. | ||||||||||||||||||||||||||
func (pm *ProcessManager) ProcessPIDExit(pid libpf.PID) bool { | ||||||||||||||||||||||||||
func (pm *ProcessManager) ProcessPIDExit(pid libpf.PID) { | ||||||||||||||||||||||||||
exitKTime := times.GetKTime() | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved this outside the lock for improved accuracy (there's a debug log in |
||||||||||||||||||||||||||
log.Debugf("- PID: %v", pid) | ||||||||||||||||||||||||||
defer pm.ebpf.RemoveReportedPID(pid) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
pm.mu.Lock() | ||||||||||||||||||||||||||
defer pm.mu.Unlock() | ||||||||||||||||||||||||||
rockdaboot marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
symbolize := false | ||||||||||||||||||||||||||
exitKTime := times.GetKTime() | ||||||||||||||||||||||||||
if pm.interpreterTracerEnabled { | ||||||||||||||||||||||||||
if len(pm.interpreters[pid]) > 0 { | ||||||||||||||||||||||||||
pidExited := false | ||||||||||||||||||||||||||
info, pidExists := pm.pidToProcessInfo[pid] | ||||||||||||||||||||||||||
if pidExists || (pm.interpreterTracerEnabled && | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Essentially same logic as before with these additions:
|
||||||||||||||||||||||||||
len(pm.interpreters[pid]) > 0) { | ||||||||||||||||||||||||||
// ProcessPIDExit may be called multiple times in short succession | ||||||||||||||||||||||||||
// for the same PID, don't update exitKTime if we've previously recorded it. | ||||||||||||||||||||||||||
if _, pidExited = pm.exitEvents[pid]; !pidExited { | ||||||||||||||||||||||||||
pm.exitEvents[pid] = exitKTime | ||||||||||||||||||||||||||
symbolize = true | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
info, ok := pm.pidToProcessInfo[pid] | ||||||||||||||||||||||||||
if !ok { | ||||||||||||||||||||||||||
if !pidExists { | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To keep the global read & write lock as short as possible, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would prevent executing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As Tim wrote, this would alter the logic. I tried to keep as much of the original semantics the same to avoid introducing new races. Maybe here it's possible to safely say that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. follow up is done in #325 |
||||||||||||||||||||||||||
log.Debugf("Skip process exit handling for unknown PID %d", pid) | ||||||||||||||||||||||||||
return symbolize | ||||||||||||||||||||||||||
return | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
if pidExited { | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't want to attempt a repeat cleanup for the same PID, if we've previously performed it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Renamed |
||||||||||||||||||||||||||
log.Debugf("Skip duplicate process exit handling for PID %d", pid) | ||||||||||||||||||||||||||
return | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
// Delete all entries we have for this particular PID from pid_page_to_mapping_info. | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kept this cleanup here as there's no immediate need to postpone cleaning up the eBPF map until |
||||||||||||||||||||||||||
|
@@ -548,9 +553,6 @@ func (pm *ProcessManager) ProcessPIDExit(pid libpf.PID) bool { | |||||||||||||||||||||||||
address, pid, err) | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
delete(pm.pidToProcessInfo, pid) | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is now taking place in |
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
return symbolize | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
func (pm *ProcessManager) SynchronizeProcess(pr process.Process) { | ||||||||||||||||||||||||||
|
@@ -670,3 +672,34 @@ func (pm *ProcessManager) ExePathForPID(pid libpf.PID) string { | |||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
return executable | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
func (pm *ProcessManager) ProcessedUntil(traceCaptureKTime times.KTime) { | ||||||||||||||||||||||||||
pm.mu.Lock() | ||||||||||||||||||||||||||
defer pm.mu.Unlock() | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
nowKTime := times.GetKTime() | ||||||||||||||||||||||||||
log.Debugf("ProcessedUntil captureKT: %v latency: %v ms", | ||||||||||||||||||||||||||
traceCaptureKTime, (nowKTime-traceCaptureKTime)/1e6) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
Comment on lines
+704
to
+710
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. keep the lock holding as short as possible:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can affect the latency measurement, since we're timing before the lock. |
||||||||||||||||||||||||||
for pid, pidExitKTime := range pm.exitEvents { | ||||||||||||||||||||||||||
if pidExitKTime > traceCaptureKTime { | ||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
delete(pm.pidToProcessInfo, pid) | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same logic as before with this single-line addition. |
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
for _, instance := range pm.interpreters[pid] { | ||||||||||||||||||||||||||
if err := instance.Detach(pm.ebpf, pid); err != nil { | ||||||||||||||||||||||||||
log.Errorf("Failed to handle interpreted process exit for PID %d: %v", | ||||||||||||||||||||||||||
rockdaboot marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
pid, err) | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
delete(pm.interpreters, pid) | ||||||||||||||||||||||||||
delete(pm.exitEvents, pid) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
log.Debugf("PID %v exit latency %v ms", pid, (nowKTime-pidExitKTime)/1e6) | ||||||||||||||||||||||||||
rockdaboot marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
// Compile time check to make sure we satisfy the interface. | ||||||||||||||||||||||||||
var _ tracehandler.TraceProcessor = (*ProcessManager)(nil) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved to
processinfo.go
for consistency (allpidToProcessInfo
accessors in one place), renamed toProcessedUntil
and updated to also cleanuppidToProcessInfo
.