Skip to content

Commit ead0a28

Browse files
committed
cmd/coordinator: clean up HTML status, add scheduler status
The HTML status for the coordinator was way too long. For pending builds, only show a single line, and render their state as "waiting_for_machine" rather than "running". And for active builds, only show the last few lines of status on the home page. People can click for details. Then add a scheduler status section too. I'm also stashing away a build's SchedItem for now (with a little refactoring to break up a long method), so a future CL can tell people where a build is in line to get a buildlet. Updates golang/go#19178 Change-Id: I2f37982ea3c7ee4a6581464117ae533499eba6a4 Reviewed-on: https://go-review.googlesource.com/c/build/+/207179 Reviewed-by: Bryan C. Mills <bcmills@google.com>
1 parent 9ab9ee3 commit ead0a28

File tree

4 files changed

+245
-38
lines changed

4 files changed

+245
-38
lines changed

cmd/coordinator/coordinator.go

+77-32
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ func serveTryStatusHTML(w http.ResponseWriter, ts *trySet, tss trySetState) {
715715
fmt.Fprintf(buf, "<tr valign=top><td align=left>%s</td><td align=center>%s</td><td><pre>%s</pre></td></tr>\n",
716716
html.EscapeString(bs.NameAndBranch()),
717717
status,
718-
bs.HTMLStatusLine())
718+
bs.HTMLStatusTruncated())
719719
}
720720
fmt.Fprintf(buf, "</table>")
721721
w.Write(buf.Bytes())
@@ -809,7 +809,7 @@ func writeStatusHeader(w http.ResponseWriter, st *buildStatus) {
809809
}
810810
if len(st.events) > 0 {
811811
io.WriteString(w, "\nEvents:\n")
812-
st.writeEventsLocked(w, false)
812+
st.writeEventsLocked(w, false, 0)
813813
}
814814
io.WriteString(w, "\nBuild log:\n")
815815
workaroundFlush(w)
@@ -1818,6 +1818,34 @@ func (st *buildStatus) checkDep(ctx context.Context, dep string) (have bool, err
18181818

18191819
var errSkipBuildDueToDeps = errors.New("build was skipped due to missing deps")
18201820

1821+
func (st *buildStatus) getBuildlet() (*buildlet.Client, error) {
1822+
schedItem := &SchedItem{
1823+
HostType: st.conf.HostType,
1824+
IsTry: st.trySet != nil,
1825+
BuilderRev: st.BuilderRev,
1826+
}
1827+
st.mu.Lock()
1828+
st.schedItem = schedItem
1829+
st.mu.Unlock()
1830+
1831+
sp := st.CreateSpan("get_buildlet")
1832+
bc, err := sched.GetBuildlet(st.ctx, st, schedItem)
1833+
sp.Done(err)
1834+
if err != nil {
1835+
err = fmt.Errorf("failed to get a buildlet: %v", err)
1836+
go st.reportErr(err)
1837+
return nil, err
1838+
}
1839+
atomic.StoreInt32(&st.hasBuildlet, 1)
1840+
1841+
st.mu.Lock()
1842+
st.bc = bc
1843+
st.mu.Unlock()
1844+
st.LogEventTime("using_buildlet", bc.IPPort())
1845+
1846+
return bc, nil
1847+
}
1848+
18211849
func (st *buildStatus) build() error {
18221850
if deps := st.conf.GoDeps; len(deps) > 0 {
18231851
ctx, cancel := context.WithTimeout(st.ctx, 30*time.Second)
@@ -1854,25 +1882,11 @@ func (st *buildStatus) build() error {
18541882
st.forceSnapshotUsage()
18551883
}
18561884

1857-
sp = st.CreateSpan("get_buildlet")
1858-
bc, err := sched.GetBuildlet(st.ctx, st, &SchedItem{
1859-
HostType: st.conf.HostType,
1860-
IsTry: st.trySet != nil,
1861-
BuilderRev: st.BuilderRev,
1862-
})
1863-
sp.Done(err)
1885+
bc, err := st.getBuildlet()
18641886
if err != nil {
1865-
err = fmt.Errorf("failed to get a buildlet: %v", err)
1866-
go st.reportErr(err)
18671887
return err
18681888
}
1869-
atomic.StoreInt32(&st.hasBuildlet, 1)
18701889
defer bc.Close()
1871-
st.mu.Lock()
1872-
st.bc = bc
1873-
st.mu.Unlock()
1874-
1875-
st.LogEventTime("using_buildlet", bc.IPPort())
18761890

18771891
if st.useSnapshot() {
18781892
sp := st.CreateSpan("write_snapshot_tar")
@@ -1967,6 +1981,8 @@ func (st *buildStatus) build() error {
19671981
return nil
19681982
}
19691983

1984+
func (st *buildStatus) HasBuildlet() bool { return atomic.LoadInt32(&st.hasBuildlet) != 0 }
1985+
19701986
func (st *buildStatus) isTry() bool { return st.trySet != nil }
19711987

19721988
func (st *buildStatus) isSlowBot() bool {
@@ -3367,8 +3383,8 @@ func (s byTestDuration) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
33673383

33683384
type eventAndTime struct {
33693385
t time.Time
3370-
evt string
3371-
text string
3386+
evt string // "get_source", "make_and_test", "make", etc
3387+
text string // optional detail text
33723388
}
33733389

33743390
// buildStatus is the status of a build.
@@ -3391,6 +3407,7 @@ type buildStatus struct {
33913407
hasBenchResults bool // set by runTests, may only be used when build() returns.
33923408

33933409
mu sync.Mutex // guards following
3410+
schedItem *SchedItem // for the initial buildlet (ignoring helpers for now)
33943411
logURL string // if non-empty, permanent URL of log
33953412
bc *buildlet.Client // nil initially, until pool returns one
33963413
done time.Time // finished running
@@ -3544,8 +3561,9 @@ func (st *buildStatus) hasEvent(event string) bool {
35443561

35453562
// HTMLStatusLine returns the HTML to show within the <pre> block on
35463563
// the main page's list of active builds.
3547-
func (st *buildStatus) HTMLStatusLine() template.HTML { return st.htmlStatusLine(true) }
3548-
func (st *buildStatus) HTMLStatusLine_done() template.HTML { return st.htmlStatusLine(false) }
3564+
func (st *buildStatus) HTMLStatusLine() template.HTML { return st.htmlStatus(singleLine) }
3565+
func (st *buildStatus) HTMLStatusTruncated() template.HTML { return st.htmlStatus(truncated) }
3566+
func (st *buildStatus) HTMLStatus() template.HTML { return st.htmlStatus(full) }
35493567

35503568
func strSliceTo(s string, n int) string {
35513569
if len(s) <= n {
@@ -3554,7 +3572,15 @@ func strSliceTo(s string, n int) string {
35543572
return s[:n]
35553573
}
35563574

3557-
func (st *buildStatus) htmlStatusLine(full bool) template.HTML {
3575+
type buildStatusDetail int
3576+
3577+
const (
3578+
singleLine buildStatusDetail = iota
3579+
truncated
3580+
full
3581+
)
3582+
3583+
func (st *buildStatus) htmlStatus(detail buildStatusDetail) template.HTML {
35583584
if st == nil {
35593585
return "[nil]"
35603586
}
@@ -3588,13 +3614,17 @@ func (st *buildStatus) htmlStatusLine(full bool) template.HTML {
35883614

35893615
var state string
35903616
if st.done.IsZero() {
3591-
state = "running"
3617+
if st.HasBuildlet() {
3618+
state = "running"
3619+
} else {
3620+
state = "waiting_for_machine"
3621+
}
35923622
} else if st.succeeded {
35933623
state = "succeeded"
35943624
} else {
35953625
state = "<font color='#700000'>failed</font>"
35963626
}
3597-
if full {
3627+
if detail > singleLine {
35983628
fmt.Fprintf(&buf, "; <a href='%s'>%s</a>; %s", html.EscapeString(st.logsURLLocked()), state, html.EscapeString(st.bc.String()))
35993629
} else {
36003630
fmt.Fprintf(&buf, "; <a href='%s'>%s</a>", html.EscapeString(st.logsURLLocked()), state)
@@ -3605,9 +3635,13 @@ func (st *buildStatus) htmlStatusLine(full bool) template.HTML {
36053635
t = st.startTime
36063636
}
36073637
fmt.Fprintf(&buf, ", %v ago", time.Since(t).Round(time.Second))
3608-
if full {
3638+
if detail > singleLine {
36093639
buf.WriteByte('\n')
3610-
st.writeEventsLocked(&buf, true)
3640+
lastLines := 0
3641+
if detail == truncated {
3642+
lastLines = 3
3643+
}
3644+
st.writeEventsLocked(&buf, true, lastLines)
36113645
}
36123646
return template.HTML(buf.String())
36133647
}
@@ -3633,10 +3667,20 @@ func (st *buildStatus) logsURLLocked() string {
36333667
}
36343668

36353669
// st.mu must be held.
3636-
func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool) {
3637-
var lastT time.Time
3638-
for _, evt := range st.events {
3639-
lastT = evt.t
3670+
// If numLines is greater than zero, it's the number of final lines to truncate to.
3671+
func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool, numLines int) {
3672+
startAt := 0
3673+
if numLines > 0 {
3674+
startAt = len(st.events) - numLines
3675+
if startAt > 0 {
3676+
io.WriteString(w, "...\n")
3677+
} else {
3678+
startAt = 0
3679+
}
3680+
}
3681+
3682+
for i := startAt; i < len(st.events); i++ {
3683+
evt := st.events[i]
36403684
e := evt.evt
36413685
text := evt.text
36423686
if htmlMode {
@@ -3648,8 +3692,9 @@ func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool) {
36483692
}
36493693
fmt.Fprintf(w, " %v %s %s\n", evt.t.Format(time.RFC3339), e, text)
36503694
}
3651-
if st.isRunningLocked() {
3652-
fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastT).Seconds()))
3695+
if st.isRunningLocked() && len(st.events) > 0 {
3696+
lastEvt := st.events[len(st.events)-1]
3697+
fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastEvt.t).Seconds()))
36533698
}
36543699
}
36553700

cmd/coordinator/sched.go

+67-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"context"
1212
"fmt"
1313
"log"
14+
"sort"
1415
"sync"
1516
"time"
1617

@@ -33,7 +34,7 @@ var useScheduler = false
3334
// for buildlets, starts the creation of buildlets from BuildletPools,
3435
// and prioritizes which callers gets them first when they're ready.
3536
type Scheduler struct {
36-
// mu guards waiting and hostsCreating.
37+
// mu guards the following fields.
3738
mu sync.Mutex
3839

3940
// waiting contains all the set of callers who are waiting for
@@ -43,6 +44,8 @@ type Scheduler struct {
4344
// hostsCreating is the number of GetBuildlet calls currently in flight
4445
// to each hostType's respective buildlet pool.
4546
hostsCreating map[string]int // hostType -> count
47+
48+
lastProgress map[string]time.Time // hostType -> time last delivered buildlet
4649
}
4750

4851
// A getBuildletResult is a buildlet that was just created and is up and
@@ -61,6 +64,7 @@ func NewScheduler() *Scheduler {
6164
s := &Scheduler{
6265
hostsCreating: make(map[string]int),
6366
waiting: make(map[string]map[*SchedItem]bool),
67+
lastProgress: make(map[string]time.Time),
6468
}
6569
return s
6670
}
@@ -83,6 +87,10 @@ func (s *Scheduler) matchBuildlet(res getBuildletResult) {
8387
case ch := <-waiter.wantRes:
8488
// Normal happy case. Something gets its buildlet.
8589
ch <- res.Client
90+
91+
s.mu.Lock()
92+
s.lastProgress[res.HostType] = time.Now()
93+
s.mu.Unlock()
8694
return
8795
case <-waiter.ctxDone:
8896
// Waiter went away in the tiny window between
@@ -205,6 +213,64 @@ func (s *Scheduler) hasWaiter(si *SchedItem) bool {
205213
return s.waiting[si.HostType][si]
206214
}
207215

216+
type schedulerWaitingState struct {
217+
Count int
218+
Newest time.Duration
219+
Oldest time.Duration
220+
}
221+
222+
func (st *schedulerWaitingState) add(si *SchedItem) {
223+
st.Count++
224+
age := time.Since(si.requestTime).Round(time.Second)
225+
if st.Newest == 0 || age < st.Newest {
226+
st.Newest = age
227+
}
228+
if st.Oldest == 0 || age > st.Oldest {
229+
st.Oldest = age
230+
}
231+
}
232+
233+
type schedulerHostState struct {
234+
HostType string
235+
LastProgress time.Duration
236+
Total schedulerWaitingState
237+
Gomote schedulerWaitingState
238+
Try schedulerWaitingState
239+
Regular schedulerWaitingState
240+
}
241+
242+
type schedulerState struct {
243+
HostTypes []schedulerHostState
244+
}
245+
246+
func (s *Scheduler) state() (st schedulerState) {
247+
s.mu.Lock()
248+
defer s.mu.Unlock()
249+
250+
for hostType, m := range s.waiting {
251+
if len(m) == 0 {
252+
continue
253+
}
254+
var hst schedulerHostState
255+
hst.HostType = hostType
256+
hst.LastProgress = time.Since(s.lastProgress[hostType]).Round(time.Second)
257+
for si := range m {
258+
hst.Total.add(si)
259+
if si.IsGomote {
260+
hst.Gomote.add(si)
261+
} else if si.IsTry {
262+
hst.Try.add(si)
263+
} else {
264+
hst.Regular.add(si)
265+
}
266+
}
267+
st.HostTypes = append(st.HostTypes, hst)
268+
}
269+
270+
sort.Slice(st.HostTypes, func(i, j int) bool { return st.HostTypes[i].HostType < st.HostTypes[j].HostType })
271+
return st
272+
}
273+
208274
// schedLess reports whether scheduled item ia is "less" (more
209275
// important) than scheduled item ib.
210276
func schedLess(ia, ib *SchedItem) bool {

cmd/coordinator/status.go

+20-5
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
574574
HealthCheckers: healthCheckers,
575575
}
576576
for _, st := range status {
577-
if atomic.LoadInt32(&st.hasBuildlet) != 0 {
577+
if st.HasBuildlet() {
578578
data.ActiveBuilds++
579579
data.Active = append(data.Active, st)
580580
if st.conf.IsReverse() {
@@ -626,6 +626,8 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
626626
reversePool.WriteHTMLStatus(&buf)
627627
data.ReversePoolStatus = template.HTML(buf.String())
628628

629+
data.SchedState = sched.state()
630+
629631
buf.Reset()
630632
if err := statusTmpl.Execute(&buf, data); err != nil {
631633
http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -688,6 +690,7 @@ type statusData struct {
688690
KubePoolStatus template.HTML // TODO: embed template
689691
ReversePoolStatus template.HTML // TODO: embed template
690692
RemoteBuildlets template.HTML
693+
SchedState schedulerState
691694
DiskFree string
692695
Version string
693696
HealthCheckers []*healthChecker
@@ -729,7 +732,19 @@ var statusTmpl = template.Must(template.New("status").Parse(`
729732
{{.Trybots}}
730733
{{end}}
731734
732-
<h2 id=remote>Remote buildlets <a href='#remote'>¶</a></h3>
735+
<h2 id=sched>Scheduler State <a href='#sched'>¶</a></h2>
736+
<ul>
737+
{{range .SchedState.HostTypes}}
738+
<li><b>{{.HostType}}</b>: {{.Total.Count}} waiting (oldest {{.Total.Oldest}}, newest {{.Total.Newest}}, progress {{.LastProgress}})
739+
{{if or .Gomote.Count .Try.Count}}<ul>
740+
{{if .Gomote.Count}}<li>gomote: {{.Gomote.Count}} (oldest {{.Gomote.Oldest}}, newest {{.Gomote.Newest}})</li>{{end}}
741+
{{if .Try.Count}}<li>try: {{.Try.Count}} (oldest {{.Try.Oldest}}, newest {{.Try.Newest}})</li>{{end}}
742+
</ul>{{end}}
743+
</li>
744+
{{end}}
745+
</ul>
746+
747+
<h2 id=remote>Remote buildlets <a href='#remote'>¶</a></h2>
733748
{{.RemoteBuildlets}}
734749
735750
<h2 id=pools>Buildlet pools <a href='#pools'>¶</a></h2>
@@ -742,21 +757,21 @@ var statusTmpl = template.Must(template.New("status").Parse(`
742757
<h2 id=active>Active builds <a href='#active'>¶</a></h2>
743758
<ul>
744759
{{range .Active}}
745-
<li><pre>{{.HTMLStatusLine}}</pre></li>
760+
<li><pre>{{.HTMLStatusTruncated}}</pre></li>
746761
{{end}}
747762
</ul>
748763
749764
<h2 id=pending>Pending builds <a href='#pending'>¶</a></h2>
750765
<ul>
751766
{{range .Pending}}
752-
<li><pre>{{.HTMLStatusLine}}</pre></li>
767+
<li><span>{{.HTMLStatusLine}}</span></li>
753768
{{end}}
754769
</ul>
755770
756771
<h2 id=completed>Recently completed <a href='#completed'>¶</a></h2>
757772
<ul>
758773
{{range .Recent}}
759-
<li><span>{{.HTMLStatusLine_done}}</span></li>
774+
<li><span>{{.HTMLStatusLine}}</span></li>
760775
{{end}}
761776
</ul>
762777

0 commit comments

Comments
 (0)