-
Notifications
You must be signed in to change notification settings - Fork 196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve support bundle data for running Pipelines, update support bundle component names and file paths, and update log warning for a particular type of error during step completion #916
Changes from all commits
936f2bc
9b277cf
3775801
a3db23e
698be19
61f14f5
60a014c
e8af1ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -435,7 +435,9 @@ | |
} | ||
}); | ||
} catch (IOException x) { | ||
LOGGER.log(Level.FINE, null, x); | ||
// TODO: If the problem is with the FlowNode and not the CpsFlowExecution, should we try to call | ||
// CpsVmExecutorService.reportProblem or CpsFlowExecution.croak to kill the build right away? | ||
LOGGER.log(Level.WARNING, "Unable to load FlowNode or CpsFlowExecution when completing " + this + ", which is likely to cause its execution to hang indefinitely", x); | ||
Comment on lines
+438
to
+440
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not have any tests in this plugin which enter this |
||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,19 +5,29 @@ | |
import com.cloudbees.jenkins.support.api.Content; | ||
import com.google.common.util.concurrent.FutureCallback; | ||
import hudson.Extension; | ||
import hudson.Functions; | ||
import hudson.model.Action; | ||
import hudson.model.Queue; | ||
import hudson.model.Run; | ||
import hudson.security.Permission; | ||
import java.io.IOException; | ||
import java.io.OutputStream; | ||
import java.io.OutputStreamWriter; | ||
import java.io.PrintWriter; | ||
import java.util.Collections; | ||
import java.util.Set; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.concurrent.TimeUnit; | ||
import jenkins.model.Jenkins; | ||
import java.nio.charset.StandardCharsets; | ||
import java.time.Duration; | ||
import java.time.Instant; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.TreeMap; | ||
import java.util.concurrent.atomic.LongAdder; | ||
import java.util.stream.Collectors; | ||
import org.jenkinsci.plugins.workflow.flow.FlowExecution; | ||
import org.jenkinsci.plugins.workflow.flow.FlowExecutionList; | ||
import org.kohsuke.stapler.HttpResponses; | ||
|
@@ -99,22 +109,56 @@ public CpsThreadDump getThreadDump() { | |
} | ||
|
||
@Override public String getDisplayName() { | ||
return "Thread dumps of running Pipeline builds"; | ||
return "Running Pipeline builds"; | ||
} | ||
|
||
@Override public ComponentCategory getCategory() { | ||
return ComponentCategory.BUILDS; | ||
} | ||
|
||
@Override public void addContents(Container container) { | ||
container.add(new Content("nodes/master/pipeline-thread-dump.txt") { | ||
@Override public void writeTo(OutputStream outputStream) throws IOException { | ||
container.add(new Content("nodes/master/pipeline-running-builds.txt") { | ||
@Override public void writeTo(OutputStream outputStream) { | ||
PrintWriter pw = new PrintWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)); | ||
for (FlowExecution flow : FlowExecutionList.get()) { | ||
if (flow instanceof CpsFlowExecution) { | ||
pw.println("Build: " + flow.getOwner().getExecutable()); | ||
((CpsFlowExecution) flow).getThreadDump().print(pw); | ||
pw.println("Approximate graph size: " + ((CpsFlowExecution) flow).approximateNodeCount()); | ||
Queue.Executable ownerExec; | ||
try { | ||
ownerExec = flow.getOwner().getExecutable(); | ||
} catch (IOException e) { | ||
pw.println("No data available for " + flow); | ||
Functions.printStackTrace(e, pw); | ||
pw.println(); | ||
continue; | ||
} | ||
pw.println("Build: " + ownerExec); | ||
if (ownerExec instanceof Run<?, ?>) { | ||
var run = (Run<?, ?>) ownerExec; | ||
var started = Instant.ofEpochMilli(run.getStartTimeInMillis()); | ||
pw.println("Started: " + started); | ||
var duration = Duration.between(started, Instant.now()); | ||
pw.print("Duration: " + duration); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not very legible; consider There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is subjective (personally I like ISO 8601 in contexts like this). Mainly I did not use |
||
if (duration.toDays() > 3) { | ||
pw.println(" (Running for more than 3 days!)"); | ||
} else { | ||
pw.println(); | ||
} | ||
} | ||
var cpsFlow = (CpsFlowExecution) flow; | ||
Map<String, LongAdder> sortedTimings = new TreeMap<>(cpsFlow.liveTimings); | ||
pw.println("Timings:"); | ||
sortedTimings.forEach((k, v) -> pw.println(" " + k + "\t" + v.longValue() / 1000 / 1000 + "ms")); | ||
pw.println("Active operations:"); | ||
long nanos = System.nanoTime(); | ||
Map<String, Optional<CountAndDuration>> sortedIncompleteTimings = new HashSet<>(cpsFlow.liveIncompleteTimings).stream() | ||
.collect(Collectors.groupingBy(t -> t.getKind().name(), TreeMap::new, | ||
Collectors.mapping(t -> new CountAndDuration(nanos - t.getStartNanos()), | ||
Collectors.reducing(CountAndDuration::new)))); | ||
sortedIncompleteTimings.forEach((k, optional) -> | ||
optional.ifPresent(cd -> | ||
pw.println(" " + k + "\t" + cd.count + "\t" + cd.duration / 1000 / 1000 + "ms"))); | ||
pw.println("Approximate graph size: " + cpsFlow.approximateNodeCount()); | ||
cpsFlow.getThreadDump().print(pw); | ||
pw.println(); | ||
} | ||
} | ||
|
@@ -123,6 +167,19 @@ public CpsThreadDump getThreadDump() { | |
}); | ||
} | ||
|
||
private static class CountAndDuration { | ||
private final int count; | ||
private final long duration; | ||
CountAndDuration(long duration) { | ||
this.count = 1; | ||
this.duration = duration; | ||
} | ||
CountAndDuration(CountAndDuration a, CountAndDuration b) { | ||
this.count = a.count + b.count; | ||
this.duration = a.duration + b.duration; | ||
} | ||
} | ||
|
||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I considered a few different approaches, like using
metrics
or refactoringliveTimings
, but in the end this seemed like the best way to report useful information about ongoing operations while keeping things simple and similar to what we already do. I don't see any benefit to persisting this data since it only applies to actively running tasks - once the build completes it should be empty, barring any cleanup-related bugs.