diff --git a/test/runtests.jl b/test/runtests.jl index fc0c174851738..063e3a3305884 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,6 +13,9 @@ include("choosetests.jl") include("testenv.jl") include("buildkitetestjson.jl") +const longrunning_delay = parse(Int, get(ENV, "JULIA_TEST_LONGRUNNING_DELAY", "45")) * 60 # minutes +const longrunning_interval = parse(Int, get(ENV, "JULIA_TEST_LONGRUNNING_INTERVAL", "15")) * 60 # minutes + (; tests, net_on, exit_on_error, use_revise, buildroot, seed) = choosetests(ARGS) tests = unique(tests) @@ -188,10 +191,10 @@ cd(@__DIR__) do at = lpad("($wrkr)", name_align - textwidth(name) + 1, " ") lock(print_lock) try - printstyled(name, at, " |", " "^elapsed_align, - "started at $(now())", + printstyled(name, at, " |", " "^elapsed_align, color=:white) + printstyled("started at $(now())", (pid > 0 ? " on pid $pid" : ""), - "\n", color=:white) + "\n", color=:light_black) finally unlock(print_lock) end @@ -228,6 +231,10 @@ cd(@__DIR__) do # Monitor stdin and kill this task on ^C # but don't do this on Windows, because it may deadlock in the kernel running_tests = Dict{String, DateTime}() + + # Track timeout timers for each test + test_timers = Dict{String, Timer}() + if !Sys.iswindows() && isa(stdin, Base.TTY) t = current_task() stdin_monitor = @async begin @@ -262,6 +269,33 @@ cd(@__DIR__) do test = popfirst!(tests) running_tests[test] = now() wrkr = p + + # Create a timer for this test to report long-running status + test_timers[test] = Timer(longrunning_delay, interval=longrunning_interval) do timer + if haskey(running_tests, test) # Check test is still running + start_time = running_tests[test] + elapsed = now() - start_time + elapsed_minutes = elapsed.value ÷ (1000 * 60) + + elapsed_str = if elapsed_minutes >= 60 + hours, mins = divrem(elapsed_minutes, 60) + "$(hours)h $(mins)m" + else + "$(elapsed_minutes)m" + end + + @lock print_lock begin + print(test) + print(lpad("($(wrkr))", name_align - textwidth(test) + 1, " "), " | ") + # Calculate total width of data columns: "Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB)" + # This is: elapsed_align + 3 + gc_align + 3 + percent_align + 3 + alloc_align + 3 + rss_align + data_width = elapsed_align + gc_align + percent_align + alloc_align + rss_align + 12 # 12 = 4 * " | " + message = "has been running for $(elapsed_str)" + centered_message = lpad(rpad(message, (data_width + textwidth(message)) ÷ 2), data_width) + printstyled(centered_message, "\n", color=:light_black) + end + end + end before = time() resp, duration = try r = remotecall_fetch(@Base.world(runtests, ∞), wrkr, test, test_path(test); seed=seed) @@ -271,6 +305,10 @@ cd(@__DIR__) do Any[CapturedException(e, catch_backtrace())], time() - before end delete!(running_tests, test) + if haskey(test_timers, test) + close(test_timers[test]) + delete!(test_timers, test) + end push!(results, (test, resp, duration)) if length(resp) == 1 print_testworker_errored(test, wrkr, exit_on_error ? nothing : resp[1]) @@ -355,6 +393,9 @@ cd(@__DIR__) do if @isdefined stdin_monitor schedule(stdin_monitor, InterruptException(); error=true) end + if @isdefined test_timers + foreach(close, values(test_timers)) + end end #=