diff --git a/core/backend_intf.ml b/core/backend_intf.ml index 94171ac4df..30303a57e1 100644 --- a/core/backend_intf.ml +++ b/core/backend_intf.ml @@ -19,9 +19,11 @@ module type S = sig -> debug_print_perf_commands:bool -> subcommand:Subcommand.t -> when_to_snapshot:When_to_snapshot.t + -> multi_snapshot:bool -> trace_mode:Trace_mode.t -> timer_resolution:Timer_resolution.t -> record_dir:string + -> intel_pt:bool (** Select whether to use Intel PT or sampling. *) -> Pid.t list -> t Deferred.Or_error.t @@ -39,6 +41,7 @@ module type S = sig : ?perf_maps:Perf_map.Table.t -> debug_print_perf_commands:bool -> record_dir:string + -> intel_pt:bool -> Decode_opts.t -> Decode_result.t Deferred.Or_error.t end diff --git a/core/decode_result.ml b/core/decode_result.ml index 579f957507..020fecf891 100644 --- a/core/decode_result.ml +++ b/core/decode_result.ml @@ -2,6 +2,6 @@ open Core open Async type t = - { events : Event.t Pipe.Reader.t + { events : Event.t Pipe.Reader.t list ; close_result : unit Or_error.t Deferred.t } diff --git a/core/decode_result.mli b/core/decode_result.mli index a263f8c2dc..0fea7bf2e2 100644 --- a/core/decode_result.mli +++ b/core/decode_result.mli @@ -1,9 +1,11 @@ open Core open Async -(* The result of decoding events is a pipe of those events, and a deferred reason why the - decoder exited. *) +(* The result of decoding events are pipe(s) of those events, and a deferred + reason why the decoder exited. Multiple pipes will be returned if multiple + snapshots were captured and processed, but they are returned in chronological + order. Waiting on [close_result] will wait on pipes in sequential order. *) type t = - { events : Event.t Pipe.Reader.t + { events : Event.t Pipe.Reader.t list ; close_result : unit Or_error.t Deferred.t } diff --git a/core/event.ml b/core/event.ml index b90fe63f77..dd915d42a0 100644 --- a/core/event.ml +++ b/core/event.ml @@ -38,30 +38,24 @@ module Location = struct end module Ok = struct - module Trace = struct + module Data = struct type t = - { thread : Thread.t - ; time : Time_ns.Span.t - ; trace_state_change : Trace_state_change.t option [@sexp.option] - ; kind : Kind.t option [@sexp.option] - ; src : Location.t - ; dst : Location.t - } - [@@deriving sexp] - end - - module Power = struct - type t = - { thread : Thread.t - ; time : Time_ns.Span.t - ; freq : int - } + | Trace of + { trace_state_change : Trace_state_change.t option [@sexp.option] + ; kind : Kind.t option [@sexp.option] + ; src : Location.t + ; dst : Location.t + } + | Sample of { callstack : Location.t list } + | Power of { freq : int } [@@deriving sexp] end type t = - | Trace of Trace.t - | Power of Power.t + { thread : Thread.t + ; time : Time_ns.Span.t + ; data : Data.t + } [@@deriving sexp] end @@ -82,19 +76,18 @@ type t = (Ok.t, Decode_error.t) Result.t [@@deriving sexp] let thread (t : t) = match t with - | Ok (Trace { thread; _ }) | Ok (Power { thread; _ }) | Error { thread; _ } -> thread + | Ok { thread; _ } | Error { thread; _ } -> thread ;; let time (t : t) = match t with - | Ok (Trace { time; _ }) | Ok (Power { time; _ }) -> Time_ns_unix.Span.Option.some time + | Ok { time; _ } -> Time_ns_unix.Span.Option.some time | Error { time; _ } -> time ;; let change_time (t : t) ~f : t = match t with - | Ok (Trace ({ time; _ } as t)) -> Ok (Trace { t with time = f time }) - | Ok (Power ({ time; _ } as t)) -> Ok (Power { t with time = f time }) + | Ok ({ time; _ } as t) -> Ok { t with time = f time } | Error ({ time; _ } as u) -> (match%optional.Time_ns_unix.Span.Option time with | None -> t diff --git a/core/event.mli b/core/event.mli index 74ff3ac040..af7ba19ac4 100644 --- a/core/event.mli +++ b/core/event.mli @@ -35,30 +35,25 @@ module Location : sig end module Ok : sig - module Trace : sig + module Data : sig type t = - { thread : Thread.t - ; time : Time_ns.Span.t - ; trace_state_change : Trace_state_change.t option - ; kind : Kind.t option - ; src : Location.t - ; dst : Location.t - } - [@@deriving sexp] - end - - module Power : sig - type t = - { thread : Thread.t - ; time : Time_ns.Span.t - ; freq : int - } + | Trace of + { trace_state_change : Trace_state_change.t option + ; kind : Kind.t option + ; src : Location.t + ; dst : Location.t + } (** Represents an event collected from Intel PT. *) + | Sample of { callstack : Location.t list } + (** Represents event collected through sampling. *) + | Power of { freq : int } (** Power event collected by Intel PT. *) [@@deriving sexp] end type t = - | Trace of Trace.t - | Power of Power.t + { thread : Thread.t + ; time : Time_ns.Span.t + ; data : Data.t + } [@@deriving sexp] end diff --git a/core/timer_resolution.ml b/core/timer_resolution.ml index 8344072c4d..60949105fd 100644 --- a/core/timer_resolution.ml +++ b/core/timer_resolution.ml @@ -4,6 +4,7 @@ type t = | Low | Normal | High + | Sample of { freq : int } | Custom of { cyc : bool option [@sexp.option] ; cyc_thresh : int option [@sexp.option] @@ -23,5 +24,5 @@ let param = (Command.Arg_type.create (fun str -> t_of_sexp (Sexp.of_string str)))) ~doc: "RESOLUTION How granular timing information should be, one of Low, Normal, High, \ - or Custom (default: Normal). For more info visit https://magic-trace.org/w/t" + Sample or Custom (default: Normal). For more info: https://magic-trace.org/w/t" ;; diff --git a/core/timer_resolution.mli b/core/timer_resolution.mli index 1f128ecc47..8123dd1dfa 100644 --- a/core/timer_resolution.mli +++ b/core/timer_resolution.mli @@ -6,6 +6,7 @@ type t = | Low | Normal | High + | Sample of { freq : int } (** Used when sampling *) | Custom of { cyc : bool option ; cyc_thresh : int option @@ -13,7 +14,7 @@ type t = ; mtc_period : int option ; noretcomp : bool option ; psb_period : int option - } + } (** Used when running with Intel PT. *) [@@deriving sexp] val param : t Command.Param.t diff --git a/src/perf_capabilities.ml b/src/perf_capabilities.ml index 8281992db6..7c39f100d3 100644 --- a/src/perf_capabilities.ml +++ b/src/perf_capabilities.ml @@ -41,10 +41,13 @@ module Version = struct end let supports_configurable_psb_period () = - let cyc_cap = - In_channel.read_all "/sys/bus/event_source/devices/intel_pt/caps/psb_cyc" - in - String.( = ) cyc_cap "1\n" + try + let cyc_cap = + In_channel.read_all "/sys/bus/event_source/devices/intel_pt/caps/psb_cyc" + in + String.( = ) cyc_cap "1\n" + with + | _ -> false ;; let supports_tracing_kernel () = diff --git a/src/perf_decode.ml b/src/perf_decode.ml new file mode 100644 index 0000000000..9e37b9330d --- /dev/null +++ b/src/perf_decode.ml @@ -0,0 +1,669 @@ +open! Core +open! Async +open! Import + +let saturating_sub_i64 a b = + match Int64.(to_int (a - b)) with + | None -> Int.max_value + | Some offset -> offset +;; + +let int64_of_hex_string str = + (* Bit hacks for fast parsing of hex strings. + * + * Note that in ASCII, ('1' | 'a' | 'A') & 0xF = 1. + * + * So for each character, take the bottom 4 bits, and add 9 if it's + * not a digit. *) + let res = ref 0L in + for i = 0 to String.length str - 1 do + let open Int64 in + let c = of_int (Char.to_int (String.unsafe_get str i)) in + res := (!res lsl 4) lor ((c land 0xFL) + ((c lsr 6) lor ((c lsr 3) land 0x8L))) + done; + !res +;; + +let%test_module _ = + (module struct + open Core + + let check str = Core.print_s ([%sexp_of: Int64.Hex.t] (int64_of_hex_string str)) + + let%expect_test "int64 hex parsing" = + check "fF"; + [%expect {| 0xff |}]; + check "f0f"; + [%expect {| 0xf0f |}]; + check "fA0f"; + [%expect {| 0xfa0f |}]; + check "0"; + [%expect {| 0x0 |}] + ;; + end) +;; + +let ok_perf_sample_line_re = + Re.Perl.re {|^ *([0-9]+)/([0-9]+) +([0-9]+).([0-9]+): *$|} |> Re.compile +;; + +let ok_perf_sample_callstack_entry_re = Re.Perl.re "^\t *([0-9a-f]+) (.*)$" |> Re.compile + +let ok_perf_line_re = + Re.Perl.re + {|^ *([0-9]+)/([0-9]+) +([0-9]+).([0-9]+): +(call|return|tr strt|syscall|sysret|hw int|iret|tr end|tr strt tr end|tr end (?:call|return|syscall|sysret|iret)|jmp|jcc) +([0-9a-f]+) (.*) => +([0-9a-f]+) (.*)$|} + |> Re.compile +;; + +(* This matches exactly the power events which contain either [cbr] or [psb offs]. *) +let ok_perf_power_line_re = + Re.Perl.re + {|^ *([0-9]+)/([0-9]+) +([0-9]+).([0-9]+): +([a-z]*)? +(cbr|psb offs): ([0-9]+ freq: ([0-9]+) MHz)?(.*)$|} + |> Re.compile +;; + +let trace_error_re = + Re.Posix.re + {|^ instruction trace error type [0-9]+ (time ([0-9]+)\.([0-9]+) )?cpu [\-0-9]+ pid ([\-0-9]+) tid ([\-0-9]+) ip (0x[0-9a-fA-F]+|0) code [0-9]+: (.*)$|} + |> Re.compile +;; + +let symbol_and_offset_re = Re.Perl.re {|^(.*)\+(0x[0-9a-f]+)\s+\(.*\)$|} |> Re.compile +let unknown_symbol_dso_re = Re.Perl.re {|^\[unknown\]\s+\((.*)\)|} |> Re.compile + +type classification = + | Trace_error + | Ok_perf_line + | Ok_perf_sample_line + | Ok_perf_power_line + +let classify line = + if String.is_prefix line ~prefix:" instruction trace error" + then Re.Group.all (Re.exec trace_error_re line), Trace_error + else ( + try Re.Group.all (Re.exec ok_perf_line_re line), Ok_perf_line with + | _ -> + (try Re.Group.all (Re.exec ok_perf_power_line_re line), Ok_perf_power_line with + | _ -> Re.Group.all (Re.exec ok_perf_sample_line_re line), Ok_perf_sample_line)) +;; + +let parse_time ~time_hi ~time_lo = + let time_lo = + (* In practice, [time_lo] seems to always be 9 decimal places, but it seems good + to guard against other possibilities. *) + let num_decimal_places = String.length time_lo in + match Ordering.of_int (Int.compare num_decimal_places 9) with + | Less -> Int.of_string time_lo * Int.pow 10 (9 - num_decimal_places) + | Equal -> Int.of_string time_lo + | Greater -> Int.of_string (String.prefix time_lo 9) + in + let time_hi = Int.of_string time_hi in + time_lo + (time_hi * 1_000_000_000) |> Time_ns.Span.of_int_ns +;; + +let parse_symbol_and_offset ?perf_maps pid str ~addr = + match Re.Group.all (Re.exec symbol_and_offset_re str) with + | [| _; symbol; offset |] -> Symbol.From_perf symbol, Int.Hex.of_string offset + | _ | (exception _) -> + let failed = Symbol.Unknown, 0 in + (match perf_maps, pid with + | None, _ | _, None -> + (match Re.Group.all (Re.exec unknown_symbol_dso_re str) with + | [| _; dso |] -> + (* CR-someday tbrindus: ideally, we would subtract the DSO base + offset from [offset] here. *) + Symbol.From_perf [%string "[unknown @ %{addr#Int64.Hex} (%{dso})]"], 0 + | _ | (exception _) -> failed) + | Some perf_map, Some pid -> + (match Perf_map.Table.symbol ~pid perf_map ~addr with + | None -> failed + | Some location -> + (* It's strange that perf isn't resolving these symbols. It says on the tin that + it supports perf map files! *) + let offset = saturating_sub_i64 addr location.start_addr in + From_perf_map location, offset)) +;; + +let maybe_pid_of_string = function + | "0" -> None + | pid -> Some (Pid.of_string pid) +;; + +let trace_error_to_event matches : Event.Decode_error.t = + match matches with + | [| _; _; time_hi; time_lo; pid; tid; ip; message |] -> + let pid = Int.of_string pid in + let tid = Int.of_string tid in + let instruction_pointer = + if String.( = ) ip "0" then None else Some (Int64.Hex.of_string ip) + in + let time = + if String.is_empty time_hi && String.is_empty time_lo + then Time_ns_unix.Span.Option.none + else Time_ns_unix.Span.Option.some (parse_time ~time_hi ~time_lo) + in + { thread = + { pid = (if pid = 0 then None else Some (Pid.of_int pid)) + ; tid = (if tid = 0 then None else Some (Pid.of_int tid)) + } + ; instruction_pointer + ; message + ; time + } + | results -> + raise_s + [%message + "Regex of trace error did not match expected fields" (results : string array)] +;; + +let ok_perf_power_line_to_event matches : Event.Ok.t option = + match matches with + | [| _; pid; tid; time_hi; time_lo; _; kind; _; freq; _ |] -> + let pid = maybe_pid_of_string pid in + let tid = maybe_pid_of_string tid in + let time = parse_time ~time_hi ~time_lo in + (match kind with + | "cbr" -> + (* cbr (core-to-bus ratio) are events which show frequency changes. *) + Some { thread = { pid; tid }; time; data = Power { freq = Int.of_string freq } } + | "psb offs" -> + (* Ignore psb (packet stream boundary) packets *) + None + | _ -> raise_s [%message "Saw unexpected power event" (matches : string array)]) + | results -> + raise_s + [%message + "Regex of perf power event did not match expected fields" (results : string array)] +;; + +let ok_perf_sample_line_to_event ?perf_maps matches lines : Event.Ok.t = + match matches with + | [| _; pid; tid; time_hi; time_lo |] -> + let pid = maybe_pid_of_string pid in + let tid = maybe_pid_of_string tid in + let time = parse_time ~time_hi ~time_lo in + let callstack = + List.map lines ~f:(fun line -> + match Re.Group.all (Re.exec ok_perf_sample_callstack_entry_re line) with + | [| _; instruction_pointer; symbol_and_offset |] -> + (* print_s [%message (instruction_pointer : string) (symbol_and_offset : string)]; *) + let instruction_pointer = int64_of_hex_string instruction_pointer in + let symbol, symbol_offset = + parse_symbol_and_offset + ?perf_maps + pid + symbol_and_offset + ~addr:instruction_pointer + in + (* print_s [%message (symbol : Symbol.t)]; *) + { Event.Location.instruction_pointer; symbol; symbol_offset } + | results -> + raise_s + [%message + "Perf output did not match expected regex when parsing callstack entry." + (results : string array)]) + in + let callstack = List.rev callstack in + { thread = { pid; tid }; time; data = Sample { callstack } } + | results -> + raise_s + [%message "Perf output did not match expected regex." (results : string array)] +;; + +let ok_perf_line_to_event ?perf_maps matches line : Event.Ok.t = + match matches with + | [| _ + ; pid + ; tid + ; time_hi + ; time_lo + ; kind + ; src_instruction_pointer + ; src_symbol_and_offset + ; dst_instruction_pointer + ; dst_symbol_and_offset + |] -> + let pid = maybe_pid_of_string pid in + let tid = maybe_pid_of_string tid in + let time = parse_time ~time_hi ~time_lo in + let src_instruction_pointer = int64_of_hex_string src_instruction_pointer in + let dst_instruction_pointer = int64_of_hex_string dst_instruction_pointer in + let src_symbol, src_symbol_offset = + parse_symbol_and_offset + ?perf_maps + pid + src_symbol_and_offset + ~addr:src_instruction_pointer + in + let dst_symbol, dst_symbol_offset = + parse_symbol_and_offset + ?perf_maps + pid + dst_symbol_and_offset + ~addr:dst_instruction_pointer + in + let starts_trace, kind = + match String.chop_prefix kind ~prefix:"tr strt" with + | None -> false, kind + | Some rest -> true, String.lstrip ~drop:Char.is_whitespace rest + in + let ends_trace, kind = + match String.chop_prefix kind ~prefix:"tr end" with + | None -> false, kind + | Some rest -> true, String.lstrip ~drop:Char.is_whitespace rest + in + let trace_state_change : Trace_state_change.t option = + match starts_trace, ends_trace with + | true, false -> Some Start + | false, true -> Some End + | false, false + (* "tr strt tr end" happens when someone `go run`s ./demo/demo.go. But that + trace is pretty broken for other reasons, so it's hard to say if this is + truly necessary. Regardless, it's slightly more user friendly to show a + broken trace instead of crashing here. *) + | true, true -> None + in + let kind : Event.Kind.t option = + match String.strip kind with + | "call" -> Some Call + | "return" -> Some Return + | "jmp" -> Some Jump + | "jcc" -> Some Jump + | "syscall" -> Some Syscall + | "hw int" -> Some Hardware_interrupt + | "iret" -> Some Iret + | "sysret" -> Some Sysret + | "" -> None + | _ -> + printf "Warning: skipping unrecognized perf output: %s\n%!" line; + None + in + { thread = { pid; tid } + ; time + ; data = + Trace + { trace_state_change + ; kind + ; src = + { instruction_pointer = src_instruction_pointer + ; symbol = src_symbol + ; symbol_offset = src_symbol_offset + } + ; dst = + { instruction_pointer = dst_instruction_pointer + ; symbol = dst_symbol + ; symbol_offset = dst_symbol_offset + } + } + } + | results -> + raise_s + [%message "Regex of expected perf output did not match." (results : string array)] +;; + +let to_event ?perf_maps lines : Event.t option = + try + match lines with + | first_line :: lines -> + (match classify first_line with + | matches, Trace_error -> Some (Error (trace_error_to_event matches)) + | matches, Ok_perf_line -> + Some (Ok (ok_perf_line_to_event ?perf_maps matches first_line)) + | matches, Ok_perf_sample_line -> + Some (Ok (ok_perf_sample_line_to_event ?perf_maps matches lines)) + | matches, Ok_perf_power_line -> + ok_perf_power_line_to_event matches |> Option.map ~f:(fun event -> Ok event)) + | [] -> raise_s [%message "Unexpected line while parsing perf output."] + with + | exn -> + raise_s + [%message + "BUG: exception raised while parsing perf output. Please report this to \ + https://github.com/janestreet/magic-trace/issues/" + (exn : exn) + ~perf_output:(lines : string list)] +;; + +let split_line_pipe pipe : string list Pipe.Reader.t = + let reader, writer = Pipe.create () in + don't_wait_for + (let%bind acc = + Pipe.fold pipe ~init:[] ~f:(fun acc line -> + let should_acc = not String.(line = "") in + let should_write = + String.(line = "") || not (Char.equal (String.get line 0) '\t') + in + let%map () = + if List.length acc > 0 && should_write + then Pipe.write writer (List.rev acc) + else Deferred.return () + in + let prev_acc = if should_write then [] else acc in + if should_acc then line :: prev_acc else prev_acc) + in + let%map () = + if List.length acc > 0 + then Pipe.write writer (List.rev acc) + else Deferred.return () + in + Pipe.close writer); + reader +;; + +let to_events ?perf_maps pipe = + let pipe = split_line_pipe pipe in + (* Every route of filtering on streams in an async way seems to be deprecated, + including converting to pipes which says that the stream creation should be + switched to a pipe creation. Changing Async_shell is out-of-scope, and I also + can't see a reason why filter_map would lead to memory leaks. *) + Pipe.map pipe ~f:(to_event ?perf_maps) |> Pipe.filter_map ~f:Fn.id +;; + +let%test_module _ = + (module struct + open Core + + let check s = + to_event (String.split ~on:'\n' s) |> [%sexp_of: Event.t option] |> print_s + ;; + + let%expect_test "C symbol" = + check + {| 25375/25375 4509191.343298468: call 7f6fce0b71f4 __clock_gettime+0x24 (foo.so) => 7ffd193838e0 __vdso_clock_gettime+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (25375)) (tid (25375)))) (time 52d4h33m11.343298468s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x7f6fce0b71f4) + (symbol (From_perf __clock_gettime)) (symbol_offset 0x24))) + (dst + ((instruction_pointer 0x7ffd193838e0) + (symbol (From_perf __vdso_clock_gettime)) (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "C symbol trace start" = + check + {| 25375/25375 4509191.343298468: tr strt 0 [unknown] (foo.so) => 7f6fce0b71d0 __clock_gettime+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (25375)) (tid (25375)))) (time 52d4h33m11.343298468s) + (data + (Trace (trace_state_change Start) + (src + ((instruction_pointer 0x0) + (symbol (From_perf "[unknown @ 0x0 (foo.so)]")) (symbol_offset 0x0))) + (dst + ((instruction_pointer 0x7f6fce0b71d0) + (symbol (From_perf __clock_gettime)) (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "C++ symbol" = + check + {| 7166/7166 4512623.871133092: call 9bc6db a::B, a::F, a::F, G::H, a::I>::run+0x1eb (foo.so) => 9f68b0 J::K+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (7166)) (tid (7166)))) (time 52d5h30m23.871133092s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x9bc6db) + (symbol + (From_perf "a::B, a::F, a::F, G::H, a::I>::run")) + (symbol_offset 0x1eb))) + (dst + ((instruction_pointer 0x9f68b0) + (symbol (From_perf "J::K")) (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "OCaml symbol" = + check + {|2017001/2017001 761439.053336670: call 56234f77576b Base.Comparable.=_2352+0xb (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x56234f77576b) + (symbol (From_perf Base.Comparable.=_2352)) (symbol_offset 0xb))) + (dst + ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) + (symbol_offset 0x0)))))))) |}] + ;; + + (* CR-someday wduff: Leaving this concrete example here for when we support this. See my + comment above as well. + + {[ + let%expect_test "Unknown Go symbol" = + check + {|2118573/2118573 770614.599007116: tr strt tr end 0 [unknown] (foo.so) => 4591e1 [unknown] (foo.so)|}; + [%expect] + ;; + ]} + *) + + let%expect_test "manufactured example 1" = + check + {|2017001/2017001 761439.053336670: call 56234f77576b x => +0xb (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x56234f77576b) (symbol (From_perf "x => ")) + (symbol_offset 0xb))) + (dst + ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) + (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "manufactured example 2" = + check + {|2017001/2017001 761439.053336670: call 56234f77576b x => +0xb (foo.so) => 56234f4bc7a0 => +0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x56234f77576b) (symbol (From_perf "x => ")) + (symbol_offset 0xb))) + (dst + ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf "=> ")) + (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "manufactured example 3" = + check + {|2017001/2017001 761439.053336670: call 56234f77576b + +0xb (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x56234f77576b) (symbol (From_perf "+ ")) + (symbol_offset 0xb))) + (dst + ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) + (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "unknown symbol in DSO" = + check + {|2017001/2017001 761439.053336670: call 56234f77576b [unknown] (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x56234f77576b) + (symbol (From_perf "[unknown @ 0x56234f77576b (foo.so)]")) + (symbol_offset 0x0))) + (dst + ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) + (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "DSO with spaces in it" = + check + {|2017001/2017001 761439.053336670: call 56234f77576b [unknown] (this is a spaced dso.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; + [%expect + {| + ((Ok + ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) + (data + (Trace (kind Call) + (src + ((instruction_pointer 0x56234f77576b) + (symbol + (From_perf "[unknown @ 0x56234f77576b (this is a spaced dso.so)]")) + (symbol_offset 0x0))) + (dst + ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) + (symbol_offset 0x0)))))))) |}] + ;; + + let%expect_test "decode error with a timestamp" = + check + " instruction trace error type 1 time 47170.086912826 cpu -1 pid 293415 tid \ + 293415 ip 0x7ffff7327730 code 7: Overflow packet"; + [%expect + {| + ((Error + ((thread ((pid (293415)) (tid (293415)))) (time (13h6m10.086912826s)) + (instruction_pointer (0x7ffff7327730)) (message "Overflow packet")))) |}] + ;; + + let%expect_test "decode error without a timestamp" = + check + " instruction trace error type 1 cpu -1 pid 293415 tid 293415 ip 0x7ffff7327730 \ + code 7: Overflow packet"; + [%expect + {| + ((Error + ((thread ((pid (293415)) (tid (293415)))) (time ()) + (instruction_pointer (0x7ffff7327730)) (message "Overflow packet")))) |}] + ;; + + let%expect_test "lost trace data" = + check + " instruction trace error type 1 time 2651115.104731379 cpu -1 pid 1801680 tid \ + 1801680 ip 0 code 8: Lost trace data"; + [%expect + {| + ((Error + ((thread ((pid (1801680)) (tid (1801680)))) (time (30d16h25m15.104731379s)) + (instruction_pointer ()) (message "Lost trace data")))) |}] + ;; + + let%expect_test "never-ending loop" = + check + " instruction trace error type 1 time 406036.830210719 cpu -1 pid 114362 tid \ + 114362 ip 0xffffffffb0999ed5 code 10: Never-ending loop (refer perf config \ + intel-pt.max-loops)"; + [%expect + {| + ((Error + ((thread ((pid (114362)) (tid (114362)))) (time (4d16h47m16.830210719s)) + (instruction_pointer (-0x4f66612b)) + (message "Never-ending loop (refer perf config intel-pt.max-loops)")))) |}] + ;; + + let%expect_test "power event csb" = + check + "2937048/2937048 448556.689322817: cbr: 46 freq: 4606 MHz \ + (159%) 0 0 [unknown] ([unknown])"; + [%expect + {| + ((Ok + ((thread ((pid (2937048)) (tid (2937048)))) (time 5d4h35m56.689322817s) + (data (Power (freq 4606)))))) |}] + ;; + + (* Expected [None] because we ignore these events currently. *) + let%expect_test "power event psb offs" = + check + "2937048/2937048 448556.689403475: psb offs: \ + 0x4be8 0 7f068fbfd330 mmap64+0x50 \ + (/usr/lib64/ld-2.28.so)"; + [%expect {| + () |}] + ;; + + let%expect_test "sampled callstack" = + check + "2060126/2060126 178090.391624068: \n\ + \tffffffff97201100 [unknown] ([unknown])\n\ + \t7f9bd48c1d80 _dl_setup_hash+0x0 (/usr/lib64/ld-2.28.so)\n\ + \t7f9bd48bd18f _dl_map_object_from_fd+0xb8f (/usr/lib64/ld-2.28.so)\n\ + \t7f9bd48bf6b0 _dl_map_object+0x1e0 (/usr/lib64/ld-2.28.so)\n\ + \t7f9bd48ca184 dl_open_worker_begin+0xa4 (/usr/lib64/ld-2.28.so)\n\ + \t7f9bd44521a2 _dl_catch_exception+0x82 (/usr/lib64/libc-2.28.so)\n\ + \t7f9bd48c9ac2 dl_open_worker+0x32 (/usr/lib64/ld-2.28.so)\n\ + \t7f9bd44521a2 _dl_catch_exception+0x82 (/usr/lib64/libc-2.28.so)\n\ + \t7f9bd48c9d0c _dl_open+0xac (/usr/lib64/ld-2.28.so)\n\ + \t7f9bd46ae1e8 dlopen_doit+0x58 (/usr/lib64/libdl-2.28.so)\n\ + \t7f9bd44521a2 _dl_catch_exception+0x82 (/usr/lib64/libc-2.28.so)\n\ + \t7f9bd445225e _dl_catch_error+0x2e (/usr/lib64/libc-2.28.so)\n\ + \t7f9bd46ae964 _dlerror_run+0x64 (/usr/lib64/libdl-2.28.so)\n\ + \t7f9bd46ae285 dlopen@@GLIBC_2.2.5+0x45 (/usr/lib64/libdl-2.28.so)\n\ + \t4008de main+0x87 (/home/alamoreaux/Documents/demo)"; + [%expect + {| + ((Ok + ((thread ((pid (2060126)) (tid (2060126)))) (time 2d1h28m10.391624068s) + (data + (Sample + (callstack + (((instruction_pointer 0x4008de) (symbol (From_perf main)) + (symbol_offset 0x87)) + ((instruction_pointer 0x7f9bd46ae285) + (symbol (From_perf dlopen@@GLIBC_2.2.5)) (symbol_offset 0x45)) + ((instruction_pointer 0x7f9bd46ae964) + (symbol (From_perf _dlerror_run)) (symbol_offset 0x64)) + ((instruction_pointer 0x7f9bd445225e) + (symbol (From_perf _dl_catch_error)) (symbol_offset 0x2e)) + ((instruction_pointer 0x7f9bd44521a2) + (symbol (From_perf _dl_catch_exception)) (symbol_offset 0x82)) + ((instruction_pointer 0x7f9bd46ae1e8) (symbol (From_perf dlopen_doit)) + (symbol_offset 0x58)) + ((instruction_pointer 0x7f9bd48c9d0c) (symbol (From_perf _dl_open)) + (symbol_offset 0xac)) + ((instruction_pointer 0x7f9bd44521a2) + (symbol (From_perf _dl_catch_exception)) (symbol_offset 0x82)) + ((instruction_pointer 0x7f9bd48c9ac2) + (symbol (From_perf dl_open_worker)) (symbol_offset 0x32)) + ((instruction_pointer 0x7f9bd44521a2) + (symbol (From_perf _dl_catch_exception)) (symbol_offset 0x82)) + ((instruction_pointer 0x7f9bd48ca184) + (symbol (From_perf dl_open_worker_begin)) (symbol_offset 0xa4)) + ((instruction_pointer 0x7f9bd48bf6b0) + (symbol (From_perf _dl_map_object)) (symbol_offset 0x1e0)) + ((instruction_pointer 0x7f9bd48bd18f) + (symbol (From_perf _dl_map_object_from_fd)) (symbol_offset 0xb8f)) + ((instruction_pointer 0x7f9bd48c1d80) + (symbol (From_perf _dl_setup_hash)) (symbol_offset 0x0)) + ((instruction_pointer -0x68dfef00) + (symbol (From_perf "[unknown @ -0x68dfef00 ([unknown])]")) + (symbol_offset 0x0))))))))) |}] + ;; + end) +;; + +module For_testing = struct + let to_event = to_event +end diff --git a/src/perf_decode.mli b/src/perf_decode.mli new file mode 100644 index 0000000000..9443dccedd --- /dev/null +++ b/src/perf_decode.mli @@ -0,0 +1,14 @@ +(** Module used for parsing / decoding [perf script] output. *) + +open! Core +open! Async +open! Import + +val to_events + : ?perf_maps:Perf_map.Table.t + -> string Pipe.Reader.t + -> Event.t Pipe.Reader.t + +module For_testing : sig + val to_event : ?perf_maps:Perf_map.Table.t -> string list -> Event.t option +end diff --git a/src/perf_tool_backend.ml b/src/perf_tool_backend.ml index 8c481660a7..868b0c5c85 100644 --- a/src/perf_tool_backend.ml +++ b/src/perf_tool_backend.ml @@ -36,9 +36,9 @@ module Record_opts = struct Pow2_pages.optional_flag "-snapshot-size" ~doc: - " Tunes the amount of data captured in a trace. Default: 4M if root or \ - perf_event_paranoid < 0, 256K otherwise. For more info visit \ - https://magic-trace.org/w/s" + "Tunes the amount of data captured in a trace. Default: 4M if root or \ + perf_event_paranoid < 0, 256K otherwise. When running with sampling, defaults \ + to 512K, but cannot be changed. For more info: https://magic-trace.org/w/s" in { multi_thread; full_execution; snapshot_size } ;; @@ -63,6 +63,38 @@ let perf_fork_exec ?env ~prog ~argv () = | `In_the_parent pid -> pid ;; +let max_sampling_frequency () = + In_channel.read_all "/proc/sys/kernel/perf_event_max_sample_rate" + |> String.rstrip (* Strip off newline *) + |> Int.of_string +;; + +(* This checks if pdcm flag is present in lscpu. This is necessary for LBR to work. + Although I couldn't ascertain that it is also sufficient. *) +let supports_lbr () = + let open Deferred.Or_error in + let%bind.Deferred lscpu_result = Process.run ~prog:"lscpu" ~args:[] () in + match lscpu_result with + | Error _ -> error_string "Unable to execute lscpu." + | Ok result -> + let parse_error = error_string "Unable to determine LBR support from lscpu." in + let flag_re = Re.Perl.re {|^Flags:\s+(\S.*)$|} |> Re.compile in + let lines = String.split ~on:'\n' result in + let flags = + Core.List.find_map lines ~f:(fun line -> + try + match Re.Group.all (Re.exec flag_re line) with + | [| _; flags |] -> Some (String.split ~on:' ' flags) + | _ -> None + with + | _ -> None) + in + (match flags with + | Some flags -> + return (Core.List.exists flags ~f:(fun flag -> String.(flag = "pdcm"))) + | None -> parse_error) +;; + module Recording = struct type t = { pid : Pid.t @@ -75,10 +107,15 @@ module Recording = struct | Userspace_and_kernel -> "uk" ;; - let perf_intel_pt_config_of_timer_resolution : Timer_resolution.t -> string = function - | Low -> "" - | Normal -> "cyc=1,cyc_thresh=1,mtc_period=0" - | High -> "cyc=1,cyc_thresh=1,mtc_period=0,noretcomp=1" + let perf_intel_pt_config_of_timer_resolution + : Timer_resolution.t -> string Deferred.Or_error.t + = function + | Low -> Deferred.Or_error.return "" + | Normal -> Deferred.Or_error.return "cyc=1,cyc_thresh=1,mtc_period=0" + | High -> Deferred.Or_error.return "cyc=1,cyc_thresh=1,mtc_period=0,noretcomp=1" + | Sample _ -> + Deferred.Or_error.error_string + "Timer resolution passed can only be used with sampling." | Custom { cyc; cyc_thresh; mtc; mtc_period; noretcomp; psb_period } -> let make_config key = function | None -> None @@ -93,6 +130,18 @@ module Recording = struct ] |> List.filter_opt |> String.concat ~sep:"," + |> Deferred.Or_error.return + ;; + + let init_record_dir record_dir = + Core_unix.mkdir_p record_dir; + Sys.readdir record_dir + |> Deferred.bind + ~f: + (Deferred.Array.iter ~f:(fun file -> + if String.is_prefix file ~prefix:"perf.data" + then Sys.remove (record_dir ^/ file) + else Deferred.return ())) ;; let attach_and_record @@ -100,11 +149,14 @@ module Recording = struct ~debug_print_perf_commands ~(subcommand : Subcommand.t) ~(when_to_snapshot : When_to_snapshot.t) + ~multi_snapshot ~(trace_mode : Trace_mode.t) ~(timer_resolution : Timer_resolution.t) ~record_dir + ~intel_pt pids = + let%bind () = init_record_dir record_dir in let%bind capabilities = Perf_capabilities.detect_exn () in let%bind.Deferred.Or_error () = match trace_mode, Perf_capabilities.(do_intersect capabilities kernel_tracing) with @@ -138,23 +190,54 @@ module Recording = struct | true -> [ "-p" ] in let pid_opt = [ List.map pids ~f:Pid.to_string |> String.concat ~sep:"," ] in - let ev_arg = - let timer_resolution : Timer_resolution.t = - match - ( timer_resolution - , Perf_capabilities.(do_intersect capabilities configurable_psb_period) ) - with - | (Normal | High), false -> - Core.eprintf - "Warning: This machine has an older generation processor, timing granularity \ - will be ~1us instead of ~10ns. Consider using a newer machine.\n\ - %!"; - Low - | _, _ -> timer_resolution - in - let intel_pt_config = perf_intel_pt_config_of_timer_resolution timer_resolution in + let%bind.Deferred.Or_error freq_opts = + let open Deferred.Or_error in + match intel_pt with + | true -> return [] + | false -> + (match timer_resolution with + | Low -> return [ "-F"; "100" ] + | Normal -> return [] (* Perf defaults to 1000Hz already. *) + | High -> return [ "-F"; Int.to_string (max_sampling_frequency ()) ] + | Sample { freq } -> return [ "-F"; Int.to_string freq ] + | Custom _ -> + error_string "Timer resolution passed can only be used with Intel PT.") + in + let%bind.Deferred.Or_error callgraph_opts = + let%map.Deferred.Or_error lbr = supports_lbr () in + match intel_pt with + | true -> [] + | false -> + [ "--call-graph" + ; (match lbr with + | false -> + "dwarf" + (* Choose to default to dwarf if lbr not present. 'fp' is also an option, but still doesn't seem to behave correctly even with [-fno-omit-frame-pointers]. *) + | true -> "lbr") + ] + in + let%bind.Deferred.Or_error ev_arg = let selector = perf_selector_of_trace_mode trace_mode in - [%string "--event=intel_pt/%{intel_pt_config}/%{selector}"] + match intel_pt with + | true -> + let timer_resolution : Timer_resolution.t = + match + ( timer_resolution + , Perf_capabilities.(do_intersect capabilities configurable_psb_period) ) + with + | (Normal | High), false -> + Core.eprintf + "Warning: This machine has an older generation processor, timing \ + granularity will be ~1us instead of ~10ns. Consider using a newer machine.\n\ + %!"; + Low + | _, _ -> timer_resolution + in + let%map.Deferred.Or_error intel_pt_config = + perf_intel_pt_config_of_timer_resolution timer_resolution + in + [%string "--event=intel_pt/%{intel_pt_config}/%{selector}"] + | false -> Deferred.Or_error.return [%string "--event=cycles:%{selector}"] in let kcore_opts = match trace_mode, Perf_capabilities.(do_intersect capabilities kcore) with @@ -175,9 +258,14 @@ module Recording = struct [] in let snapshot_size_opt = - match snapshot_size with - | None -> [] - | Some snapshot_size -> [ [%string "-m,%{Pow2_pages.num_pages snapshot_size#Int}"] ] + match snapshot_size, intel_pt with + | Some snapshot_size, true -> + [ [%string "-m,%{Pow2_pages.num_pages snapshot_size#Int}"] ] + | Some _, false -> + Core.eprintf + "Warning: -snapshot-size is ignored when not running with Intel PT.\n"; + [] + | _, _ -> [] in let when_to_snapshot = if full_execution @@ -189,19 +277,36 @@ module Recording = struct | Application_calls_a_function _ -> `function_call) in let snapshot_opt = - match when_to_snapshot with - | `never -> [] - | `at_exit `sigint -> [ "--snapshot=e" ] - | `function_call | `at_exit `sigusr2 -> [ "--snapshot" ] + match intel_pt with + | false -> [] + | true -> + (match when_to_snapshot with + | `never -> [] + | `at_exit `sigint -> [ "--snapshot=e" ] + | `function_call | `at_exit `sigusr2 -> [ "--snapshot" ]) + in + let overwrite_opts = + match intel_pt, full_execution with + | false, false -> [ "--overwrite" ] + | _, _ -> [] + in + let switch_opts = + match multi_snapshot with + | true -> [ "--switch-output=signal" ] + | false -> [] in let argv = List.concat [ [ "perf"; "record"; "-o"; record_dir ^/ "perf.data"; ev_arg; "--timestamp" ] + ; overwrite_opts + ; switch_opts ; thread_opts ; pid_opt ; snapshot_opt ; kcore_opts ; snapshot_size_opt + ; callgraph_opts + ; freq_opts ] in if debug_print_perf_commands then Core.printf "%s\n%!" (String.concat ~sep:" " argv); @@ -266,546 +371,58 @@ module Decode_opts = struct let param = Command.Param.return () end -module Perf_line = struct - let report_itraces = "bep" - let report_fields = "pid,tid,time,flags,ip,addr,sym,symoff,synth,dso" - - let saturating_sub_i64 a b = - match Int64.(to_int (a - b)) with - | None -> Int.max_value - | Some offset -> offset - ;; - - let int64_of_hex_string str = - (* Bit hacks for fast parsing of hex strings. - * - * Note that in ASCII, ('1' | 'a' | 'A') & 0xF = 1. - * - * So for each character, take the bottom 4 bits, and add 9 if it's - * not a digit. *) - let res = ref 0L in - for i = 0 to String.length str - 1 do - let open Int64 in - let c = of_int (Char.to_int (String.unsafe_get str i)) in - res := (!res lsl 4) lor ((c land 0xFL) + ((c lsr 6) lor ((c lsr 3) land 0x8L))) - done; - !res - ;; - - let%test_module _ = - (module struct - open Core - - let check str = Core.print_s ([%sexp_of: Int64.Hex.t] (int64_of_hex_string str)) - - let%expect_test "int64 hex parsing" = - check "fF"; - [%expect {| 0xff |}]; - check "f0f"; - [%expect {| 0xf0f |}]; - check "fA0f"; - [%expect {| 0xfa0f |}]; - check "0"; - [%expect {| 0x0 |}] - ;; - end) - ;; - - let ok_perf_line_re = - Re.Perl.re - {|^ *([0-9]+)/([0-9]+) +([0-9]+).([0-9]+): +(call|return|tr strt|syscall|sysret|hw int|iret|tr end|tr strt tr end|tr end (?:call|return|syscall|sysret|iret)|jmp|jcc) +([0-9a-f]+) (.*) => +([0-9a-f]+) (.*)$|} - |> Re.compile - ;; - - (* This matches exactly the power events which contain either [cbr] or [psb offs]. *) - let ok_perf_power_line_re = - Re.Perl.re - {|^ *([0-9]+)/([0-9]+) +([0-9]+).([0-9]+): +([a-z]*)? +(cbr|psb offs): ([0-9]+ freq: ([0-9]+) MHz)?(.*)$|} - |> Re.compile - ;; - - let trace_error_re = - Re.Posix.re - {|^ instruction trace error type [0-9]+ (time ([0-9]+)\.([0-9]+) )?cpu [\-0-9]+ pid ([\-0-9]+) tid ([\-0-9]+) ip (0x[0-9a-fA-F]+|0) code [0-9]+: (.*)$|} - |> Re.compile - ;; - - let symbol_and_offset_re = Re.Perl.re {|^(.*)\+(0x[0-9a-f]+)\s+\(.*\)$|} |> Re.compile - let unknown_symbol_dso_re = Re.Perl.re {|^\[unknown\]\s+\((.*)\)|} |> Re.compile - - type classification = - | Trace_error - | Ok_perf_line - | Ok_perf_power_line - - let classify line = - if String.is_prefix line ~prefix:" instruction trace error" - then Re.Group.all (Re.exec trace_error_re line), Trace_error - else ( - try Re.Group.all (Re.exec ok_perf_line_re line), Ok_perf_line with - | _ -> Re.Group.all (Re.exec ok_perf_power_line_re line), Ok_perf_power_line) - ;; - - let parse_time ~time_hi ~time_lo = - let time_lo = - (* In practice, [time_lo] seems to always be 9 decimal places, but it seems good - to guard against other possibilities. *) - let num_decimal_places = String.length time_lo in - match Ordering.of_int (Int.compare num_decimal_places 9) with - | Less -> Int.of_string time_lo * Int.pow 10 (9 - num_decimal_places) - | Equal -> Int.of_string time_lo - | Greater -> Int.of_string (String.prefix time_lo 9) - in - let time_hi = Int.of_string time_hi in - time_lo + (time_hi * 1_000_000_000) |> Time_ns.Span.of_int_ns - ;; - - let maybe_pid_of_string = function - | "0" -> None - | pid -> Some (Pid.of_string pid) - ;; - - let trace_error_to_event matches : Event.Decode_error.t = - match matches with - | [| _; _; time_hi; time_lo; pid; tid; ip; message |] -> - let pid = maybe_pid_of_string pid in - let tid = maybe_pid_of_string tid in - let instruction_pointer = - if String.( = ) ip "0" then None else Some (Int64.Hex.of_string ip) - in - let time = - if String.is_empty time_hi && String.is_empty time_lo - then Time_ns_unix.Span.Option.none - else Time_ns_unix.Span.Option.some (parse_time ~time_hi ~time_lo) - in - { thread = { pid; tid }; instruction_pointer; message; time } - | results -> - raise_s - [%message - "Regex of trace error did not match expected fields" (results : string array)] - ;; - - let ok_perf_power_line_to_event matches : Event.Ok.t option = - match matches with - | [| _; pid; tid; time_hi; time_lo; _; kind; _; freq; _ |] -> - let pid = maybe_pid_of_string pid in - let tid = maybe_pid_of_string tid in - let time = parse_time ~time_hi ~time_lo in - (match kind with - | "cbr" -> - (* cbr (core-to-bus ratio) are events which show frequency changes. *) - Some (Power { thread = { pid; tid }; time; freq = Int.of_string freq }) - | "psb offs" -> - (* Ignore psb (packet stream boundary) packets *) - None - | _ -> raise_s [%message "Saw unexpected power event" (matches : string array)]) - | results -> - raise_s - [%message - "Regex of perf power event did not match expected fields" - (results : string array)] - ;; - - let ok_perf_line_to_event ?perf_maps matches line : Event.Ok.t = - match matches with - | [| _ - ; pid - ; tid - ; time_hi - ; time_lo - ; kind - ; src_instruction_pointer - ; src_symbol_and_offset - ; dst_instruction_pointer - ; dst_symbol_and_offset - |] -> - let pid = Int.of_string pid in - let tid = Int.of_string tid in - let time = parse_time ~time_hi ~time_lo in - let src_instruction_pointer = int64_of_hex_string src_instruction_pointer in - let dst_instruction_pointer = int64_of_hex_string dst_instruction_pointer in - let parse_symbol_and_offset str ~addr = - match Re.Group.all (Re.exec symbol_and_offset_re str) with - | [| _; symbol; offset |] -> Symbol.From_perf symbol, Int.Hex.of_string offset - | _ | (exception _) -> - let failed = Symbol.Unknown, 0 in - (match perf_maps with - | None -> - (match Re.Group.all (Re.exec unknown_symbol_dso_re str) with - | [| _; dso |] -> - (* CR-someday tbrindus: ideally, we would subtract the DSO base - offset from [offset] here. *) - Symbol.From_perf [%string "[unknown @ %{addr#Int64.Hex} (%{dso})]"], 0 - | _ | (exception _) -> failed) - | Some perf_map -> - (match Perf_map.Table.symbol ~pid:(Pid.of_int pid) perf_map ~addr with - | None -> failed - | Some location -> - (* It's strange that perf isn't resolving these symbols. It says on the tin that - it supports perf map files! *) - let offset = saturating_sub_i64 addr location.start_addr in - From_perf_map location, offset)) - in - let src_symbol, src_symbol_offset = - parse_symbol_and_offset src_symbol_and_offset ~addr:src_instruction_pointer - in - let dst_symbol, dst_symbol_offset = - parse_symbol_and_offset dst_symbol_and_offset ~addr:dst_instruction_pointer - in - let starts_trace, kind = - match String.chop_prefix kind ~prefix:"tr strt" with - | None -> false, kind - | Some rest -> true, String.lstrip ~drop:Char.is_whitespace rest - in - let ends_trace, kind = - match String.chop_prefix kind ~prefix:"tr end" with - | None -> false, kind - | Some rest -> true, String.lstrip ~drop:Char.is_whitespace rest - in - let trace_state_change : Trace_state_change.t option = - match starts_trace, ends_trace with - | true, false -> Some Start - | false, true -> Some End - | false, false - (* "tr strt tr end" happens when someone `go run`s ./demo/demo.go. But that - trace is pretty broken for other reasons, so it's hard to say if this is - truly necessary. Regardless, it's slightly more user friendly to show a - broken trace instead of crashing here. *) - | true, true -> None - in - let kind : Event.Kind.t option = - match String.strip kind with - | "call" -> Some Call - | "return" -> Some Return - | "jmp" -> Some Jump - | "jcc" -> Some Jump - | "syscall" -> Some Syscall - | "hw int" -> Some Hardware_interrupt - | "iret" -> Some Iret - | "sysret" -> Some Sysret - | "" -> None - | _ -> - printf "Warning: skipping unrecognized perf output: %s\n%!" line; - None - in - Trace - { thread = - { pid = (if pid = 0 then None else Some (Pid.of_int pid)) - ; tid = (if tid = 0 then None else Some (Pid.of_int tid)) - } - ; time - ; trace_state_change - ; kind - ; src = - { instruction_pointer = src_instruction_pointer - ; symbol = src_symbol - ; symbol_offset = src_symbol_offset - } - ; dst = - { instruction_pointer = dst_instruction_pointer - ; symbol = dst_symbol - ; symbol_offset = dst_symbol_offset - } - } - | results -> - raise_s - [%message "Regex of expected perf output did not match." (results : string array)] - ;; - - let to_event ?perf_maps line : Event.t option = - try - match classify line with - | matches, Trace_error -> Some (Error (trace_error_to_event matches)) - | matches, Ok_perf_line -> Some (Ok (ok_perf_line_to_event matches ?perf_maps line)) - | matches, Ok_perf_power_line -> - ok_perf_power_line_to_event matches |> Option.map ~f:(fun event -> Ok event) - with - | exn -> - raise_s - [%message - "BUG: exception raised while parsing perf output. Please report this to \ - https://github.com/janestreet/magic-trace/issues/" - (exn : exn) - ~perf_output:(line : string)] - ;; - - let%test_module _ = - (module struct - open Core - - let check s = to_event s |> [%sexp_of: Event.t option] |> print_s +let report_itraces = "bep" - let%expect_test "C symbol" = - check - {| 25375/25375 4509191.343298468: call 7f6fce0b71f4 __clock_gettime+0x24 (foo.so) => 7ffd193838e0 __vdso_clock_gettime+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (25375)) (tid (25375)))) (time 52d4h33m11.343298468s) - (kind Call) - (src - ((instruction_pointer 0x7f6fce0b71f4) - (symbol (From_perf __clock_gettime)) (symbol_offset 0x24))) - (dst - ((instruction_pointer 0x7ffd193838e0) - (symbol (From_perf __vdso_clock_gettime)) (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "C symbol trace start" = - check - {| 25375/25375 4509191.343298468: tr strt 0 [unknown] (foo.so) => 7f6fce0b71d0 __clock_gettime+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (25375)) (tid (25375)))) (time 52d4h33m11.343298468s) - (trace_state_change Start) - (src - ((instruction_pointer 0x0) - (symbol (From_perf "[unknown @ 0x0 (foo.so)]")) (symbol_offset 0x0))) - (dst - ((instruction_pointer 0x7f6fce0b71d0) - (symbol (From_perf __clock_gettime)) (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "C++ symbol" = - check - {| 7166/7166 4512623.871133092: call 9bc6db a::B, a::F, a::F, G::H, a::I>::run+0x1eb (foo.so) => 9f68b0 J::K+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (7166)) (tid (7166)))) (time 52d5h30m23.871133092s) - (kind Call) - (src - ((instruction_pointer 0x9bc6db) - (symbol - (From_perf "a::B, a::F, a::F, G::H, a::I>::run")) - (symbol_offset 0x1eb))) - (dst - ((instruction_pointer 0x9f68b0) - (symbol (From_perf "J::K")) (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "OCaml symbol" = - check - {|2017001/2017001 761439.053336670: call 56234f77576b Base.Comparable.=_2352+0xb (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) - (kind Call) - (src - ((instruction_pointer 0x56234f77576b) - (symbol (From_perf Base.Comparable.=_2352)) (symbol_offset 0xb))) - (dst - ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) - (symbol_offset 0x0))))))) |}] - ;; - - (* CR-someday wduff: Leaving this concrete example here for when we support this. See my - comment above as well. - - {[ - let%expect_test "Unknown Go symbol" = - check - {|2118573/2118573 770614.599007116: tr strt tr end 0 [unknown] (foo.so) => 4591e1 [unknown] (foo.so)|}; - [%expect] - ;; - ]} - *) - - let%expect_test "manufactured example 1" = - check - {|2017001/2017001 761439.053336670: call 56234f77576b x => +0xb (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) - (kind Call) - (src - ((instruction_pointer 0x56234f77576b) (symbol (From_perf "x => ")) - (symbol_offset 0xb))) - (dst - ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) - (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "manufactured example 2" = - check - {|2017001/2017001 761439.053336670: call 56234f77576b x => +0xb (foo.so) => 56234f4bc7a0 => +0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) - (kind Call) - (src - ((instruction_pointer 0x56234f77576b) (symbol (From_perf "x => ")) - (symbol_offset 0xb))) - (dst - ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf "=> ")) - (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "manufactured example 3" = - check - {|2017001/2017001 761439.053336670: call 56234f77576b + +0xb (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) - (kind Call) - (src - ((instruction_pointer 0x56234f77576b) (symbol (From_perf "+ ")) - (symbol_offset 0xb))) - (dst - ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) - (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "unknown symbol in DSO" = - check - {|2017001/2017001 761439.053336670: call 56234f77576b [unknown] (foo.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) - (kind Call) - (src - ((instruction_pointer 0x56234f77576b) - (symbol (From_perf "[unknown @ 0x56234f77576b (foo.so)]")) - (symbol_offset 0x0))) - (dst - ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) - (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "DSO with spaces in it" = - check - {|2017001/2017001 761439.053336670: call 56234f77576b [unknown] (this is a spaced dso.so) => 56234f4bc7a0 caml_apply2+0x0 (foo.so)|}; - [%expect - {| - ((Ok - (Trace - ((thread ((pid (2017001)) (tid (2017001)))) (time 8d19h30m39.05333667s) - (kind Call) - (src - ((instruction_pointer 0x56234f77576b) - (symbol - (From_perf "[unknown @ 0x56234f77576b (this is a spaced dso.so)]")) - (symbol_offset 0x0))) - (dst - ((instruction_pointer 0x56234f4bc7a0) (symbol (From_perf caml_apply2)) - (symbol_offset 0x0))))))) |}] - ;; - - let%expect_test "decode error with a timestamp" = - check - " instruction trace error type 1 time 47170.086912826 cpu -1 pid 293415 tid \ - 293415 ip 0x7ffff7327730 code 7: Overflow packet"; - [%expect - {| - ((Error - ((thread ((pid (293415)) (tid (293415)))) (time (13h6m10.086912826s)) - (instruction_pointer (0x7ffff7327730)) (message "Overflow packet")))) |}] - ;; - - let%expect_test "decode error without a timestamp" = - check - " instruction trace error type 1 cpu -1 pid 293415 tid 293415 ip \ - 0x7ffff7327730 code 7: Overflow packet"; - [%expect - {| - ((Error - ((thread ((pid (293415)) (tid (293415)))) (time ()) - (instruction_pointer (0x7ffff7327730)) (message "Overflow packet")))) |}] - ;; - - let%expect_test "lost trace data" = - check - " instruction trace error type 1 time 2651115.104731379 cpu -1 pid 1801680 tid \ - 1801680 ip 0 code 8: Lost trace data"; - [%expect - {| - ((Error - ((thread ((pid (1801680)) (tid (1801680)))) (time (30d16h25m15.104731379s)) - (instruction_pointer ()) (message "Lost trace data")))) |}] - ;; - - let%expect_test "never-ending loop" = - check - " instruction trace error type 1 time 406036.830210719 cpu -1 pid 114362 tid \ - 114362 ip 0xffffffffb0999ed5 code 10: Never-ending loop (refer perf config \ - intel-pt.max-loops)"; - [%expect - {| - ((Error - ((thread ((pid (114362)) (tid (114362)))) (time (4d16h47m16.830210719s)) - (instruction_pointer (-0x4f66612b)) - (message "Never-ending loop (refer perf config intel-pt.max-loops)")))) |}] - ;; - - let%expect_test "power event csb" = - check - "2937048/2937048 448556.689322817: cbr: 46 freq: 4606 \ - MHz (159%) 0 0 [unknown] ([unknown])"; - [%expect - {| - ((Ok - (Power - ((thread ((pid (2937048)) (tid (2937048)))) (time 5d4h35m56.689322817s) - (freq 4606))))) |}] - ;; - - (* Expected [None] because we ignore these events currently. *) - let%expect_test "power event psb offs" = - check - "2937048/2937048 448556.689403475: psb offs: \ - 0x4be8 0 7f068fbfd330 mmap64+0x50 \ - (/usr/lib64/ld-2.28.so)"; - [%expect {| () |}] - ;; - end) - ;; -end +let report_fields intel_pt = + if intel_pt + then "pid,tid,time,flags,ip,addr,sym,symoff,synth,dso" + else "pid,tid,time,ip,sym,symoff,dso" +;; -let decode_events ?perf_maps ~debug_print_perf_commands ~record_dir () = - let args = - [ "script" - ; "-i" - ; record_dir ^/ "perf.data" - ; "--ns" - ; [%string "--itrace=%{Perf_line.report_itraces}"] - ; "-F" - ; Perf_line.report_fields - ] +let decode_events ?perf_maps ~debug_print_perf_commands ~record_dir ~intel_pt () = + let%bind files = + Sys.readdir record_dir + >>| Array.to_list + >>| List.filter ~f:(String.is_prefix ~prefix:"perf.data") in - if debug_print_perf_commands - then Core.printf "perf %s\n%!" (String.concat ~sep:" " args); - (* CR-someday tbrindus: this should be switched over to using [perf_fork_exec] to avoid - the [perf script] process from outliving the parent. *) - let%bind perf_script_proc = Process.create_exn ~env:perf_env ~prog:"perf" ~args () in - let line_pipe = Process.stdout perf_script_proc |> Reader.lines in - don't_wait_for - (Reader.transfer - (Process.stderr perf_script_proc) - (Writer.pipe (force Writer.stderr))); - let events = - (* Every route of filtering on streams in an async way seems to be deprecated, - including converting to pipes which says that the stream creation should be - switched to a pipe creation. Changing Async_shell is out-of-scope, and I also - can't see a reason why filter_map would lead to memory leaks. *) - Pipe.map line_pipe ~f:(Perf_line.to_event ?perf_maps) |> Pipe.filter_map ~f:Fn.id + let%map result = + Deferred.List.map files ~f:(fun perf_data_file -> + let args = + [ "script" + ; "-i" + ; record_dir ^/ perf_data_file + ; "--ns" + ; [%string "--itrace=%{report_itraces}"] + ; "-F" + ; report_fields intel_pt + ] + in + if debug_print_perf_commands + then Core.printf "perf %s\n%!" (String.concat ~sep:" " args); + (* CR-someday tbrindus: this should be switched over to using [perf_fork_exec] to avoid + the [perf script] process from outliving the parent. *) + let%map perf_script_proc = + Process.create_exn ~env:perf_env ~prog:"perf" ~args () + in + let line_pipe = Process.stdout perf_script_proc |> Reader.lines in + don't_wait_for + (Reader.transfer + (Process.stderr perf_script_proc) + (Writer.pipe (force Writer.stderr))); + let events = Perf_decode.to_events ?perf_maps line_pipe in + let close_result = + let%map exit_or_signal = Process.wait perf_script_proc in + perf_exit_to_or_error exit_or_signal + in + events, close_result) in + let events = List.map result ~f:(fun (events, _close_result) -> events) in + (* Force [close_result] to wait on [Pipe.t]s in order.*) let close_result = - let%map exit_or_signal = Process.wait perf_script_proc in - perf_exit_to_or_error exit_or_signal + List.map result ~f:(fun (_events, close_result) -> close_result) + |> Deferred.List.fold ~init:(Ok ()) ~f:(fun acc close_result -> + let%bind.Deferred.Or_error () = close_result in + Deferred.return acc) in - Ok { Decode_result.events; close_result } |> Deferred.return + Ok { Decode_result.events; close_result } ;; diff --git a/src/perf_tool_backend.mli b/src/perf_tool_backend.mli index d0842da0e2..d5f34b6fee 100644 --- a/src/perf_tool_backend.mli +++ b/src/perf_tool_backend.mli @@ -2,7 +2,3 @@ open! Import include Backend_intf.S - -module Perf_line : sig - val to_event : ?perf_maps:Perf_map.Table.t -> string -> Event.t option -end diff --git a/src/trace.ml b/src/trace.ml index e4f64155bc..222ee27ce6 100644 --- a/src/trace.ml +++ b/src/trace.ml @@ -22,6 +22,18 @@ let supports_command command = let supports_fzf = supports_command "fzf" let supports_perf = supports_command "perf" +let should_use_intel_pt use_sampling = + match use_sampling with + | true -> false + | false -> + (match Core_unix.access "/sys/bus/event_source/devices/intel_pt" [ `Exists ] with + | Ok () -> true + | Error _ -> + Core.printf + "Intel PT support not found. Magic trace will continue and use sampling instead."; + false) +;; + let check_for_perf () = if force supports_perf then return (Ok ()) @@ -45,20 +57,6 @@ let create_elf ~executable ~(when_to_snapshot : When_to_snapshot.t) = | Magic_trace_or_the_application_terminates, _ | _, Some _ -> return (Ok elf) ;; -(* Other parts of the process would fail after this without IPT, but by checking directly - we can give a more helpful error message regardless of backend. *) -let check_for_processor_trace_support () = - match Core_unix.access "/sys/bus/event_source/devices/intel_pt" [ `Exists ] with - | Ok () -> return (Ok ()) - | Error _ -> - Deferred.Or_error.error_string - "Error: This machine doesn't support Intel Processor Trace, which is a hardware \ - feature essential for magic-trace to work.\n\ - This may be because it's a virtual machine or it's a physical machine that isn't \ - new enough or uses an AMD processor.\n\ - Try again on a physical Intel machine." -;; - let debug_flag flag = if Env_vars.debug then flag else Command.Param.return false let debug_print_perf_commands = @@ -79,10 +77,10 @@ let write_trace_from_events let { Decode_result.events; close_result } = decode_result in (* Normalize to earliest event = 0 to avoid Perfetto rounding issues *) let%bind.Deferred earliest_time = + let events = List.hd_exn events in let%map.Deferred _wait_for_first = Pipe.values_available events in match Pipe.peek events with - | Some (Ok (Trace earliest)) -> earliest.time - | Some (Ok (Power earliest)) -> earliest.time + | Some (Ok earliest) -> earliest.time | None | Some (Error _) -> Time_ns.Span.zero in let trace = @@ -92,12 +90,13 @@ let write_trace_from_events Tracing.Trace.Expert.create ~base_time:(Some base_time) writer in let events = - if print_events - then - Pipe.map events ~f:(fun (event : Event.t) -> - Core.print_s ~mach:() (Event.sexp_of_t event); - event) - else events + List.map events ~f:(fun events -> + if print_events + then + Pipe.map events ~f:(fun (event : Event.t) -> + Core.print_s ~mach:() (Event.sexp_of_t event); + event) + else events) in let writer = Trace_writer.create @@ -109,8 +108,22 @@ let write_trace_from_events ~annotate_inferred_start_times:Env_vars.debug trace in - let process_event ev = Trace_writer.write_event writer ev in - let%bind () = Pipe.iter_without_pushback events ~f:process_event in + let last_index = ref 0 in + let process_event index ev = + (* When processing a new snapshot, clear all [Trace_writer] data in order to + avoid sharing callstacks, start times, etc. *) + if index > 0 && not (index = !last_index) + then ( + match%optional.Time_ns_unix.Span.Option Event.time ev with + | None -> Trace_writer.end_of_trace writer + | Some to_time -> Trace_writer.end_of_trace ~to_time writer); + last_index := index; + Trace_writer.write_event writer ev + in + let%bind () = + Deferred.List.iteri events ~f:(fun index events -> + Pipe.iter_without_pushback events ~f:(process_event index)) + in Trace_writer.end_of_trace writer; Tracing.Trace.close trace; close_result @@ -118,10 +131,11 @@ let write_trace_from_events let write_event_sexps writer decode_result = let { Decode_result.events; close_result } = decode_result in - Writer.write_line writer "(V1 ("; + Writer.write_line writer "(V3 ("; let%bind () = - Pipe.iter_without_pushback events ~f:(fun (event : Event.t) -> - Writer.write_sexp ~terminate_with:Newline writer (Event.sexp_of_t event)) + Deferred.List.iter events ~f:(fun events -> + Pipe.iter_without_pushback events ~f:(fun (event : Event.t) -> + Writer.write_sexp ~terminate_with:Newline writer (Event.sexp_of_t event))) in Writer.write_line writer "))"; close_result @@ -148,6 +162,7 @@ module Make_commands (Backend : Backend_intf.S) = struct ~trace_mode ~debug_print_perf_commands ~record_dir + ~intel_pt { Decode_opts.output_config; decode_opts; print_events } = Core.eprintf "[ Decoding, this takes a while... ]\n%!"; @@ -161,6 +176,7 @@ module Make_commands (Backend : Backend_intf.S) = struct decode_opts ~debug_print_perf_commands ~record_dir + ~intel_pt in let%bind () = write_event_sexps writer decode_result in return ()) @@ -192,6 +208,7 @@ module Make_commands (Backend : Backend_intf.S) = struct decode_opts ~debug_print_perf_commands ~record_dir + ~intel_pt in let%bind () = write_trace_from_events @@ -215,6 +232,7 @@ module Make_commands (Backend : Backend_intf.S) = struct ; executable : string ; trace_mode : Trace_mode.t ; timer_resolution : Timer_resolution.t + ; use_sampling : bool } end @@ -227,7 +245,14 @@ module Make_commands (Backend : Backend_intf.S) = struct } end - let attach (opts : Record_opts.t) ~elf ~debug_print_perf_commands ~subcommand pids = + let attach + (opts : Record_opts.t) + ~elf + ~debug_print_perf_commands + ~subcommand + ~intel_pt + pids + = Process_info.read_all_proc_info (); let head_pid = List.hd_exn pids in let%bind.Deferred.Or_error snap_loc = @@ -241,6 +266,8 @@ module Make_commands (Backend : Backend_intf.S) = struct match which_function with | Use_fzf_to_select_one -> let all_symbols = Elf.all_symbols elf in + let symbols = List.map all_symbols ~f:(fun (x, _) -> x) in + print_s [%message "" (symbols : string list)]; if force supports_fzf then ( match%bind.Deferred.Or_error Fzf.pick_one (Assoc all_symbols) with @@ -265,9 +292,11 @@ module Make_commands (Backend : Backend_intf.S) = struct ~debug_print_perf_commands ~subcommand ~when_to_snapshot:opts.when_to_snapshot + ~multi_snapshot:opts.multi_snapshot ~trace_mode:opts.trace_mode ~timer_resolution:opts.timer_resolution ~record_dir:opts.record_dir + ~intel_pt pids in let done_ivar = Ivar.create () in @@ -340,11 +369,11 @@ module Make_commands (Backend : Backend_intf.S) = struct return (Ok ()) ;; - let run_and_record record_opts ~elf ~debug_print_perf_commands ~prog ~argv = + let run_and_record record_opts ~elf ~debug_print_perf_commands ~prog ~argv ~intel_pt = let open Deferred.Or_error.Let_syntax in let pid = Ptrace.fork_exec_stopped ~prog ~argv () in let%bind attachment = - attach record_opts ~elf ~debug_print_perf_commands ~subcommand:Run [ pid ] + attach record_opts ~elf ~debug_print_perf_commands ~subcommand:Run ~intel_pt [ pid ] in Ptrace.resume pid; (* Forward ^C to the child, unless it has already exited. *) @@ -377,9 +406,9 @@ module Make_commands (Backend : Backend_intf.S) = struct return pid ;; - let attach_and_record record_opts ~elf ~debug_print_perf_commands pids = + let attach_and_record record_opts ~elf ~debug_print_perf_commands ~intel_pt pids = let%bind.Deferred.Or_error attachment = - attach record_opts ~elf ~debug_print_perf_commands ~subcommand:Attach pids + attach record_opts ~elf ~debug_print_perf_commands ~subcommand:Attach ~intel_pt pids in let { Attachment.done_ivar; _ } = attachment in let stop = Ivar.read done_ivar in @@ -421,7 +450,15 @@ module Make_commands (Backend : Backend_intf.S) = struct files may crash the trace viewer." and trace_mode = Trace_mode.param and timer_resolution = Timer_resolution.param - and backend_opts = Backend.Record_opts.param in + and backend_opts = Backend.Record_opts.param + and use_sampling = + flag + "-sampling" + no_arg + ~doc: + "Instead of Intel PT will use perf with sampling if passed. If Intel PT is not \ + available, magic trace will default to this anyway." + in fun ~executable ~f -> let record_dir, cleanup = match record_dir with @@ -443,6 +480,7 @@ module Make_commands (Backend : Backend_intf.S) = struct ; executable ; trace_mode ; timer_resolution + ; use_sampling }) ;; @@ -476,7 +514,6 @@ module Make_commands (Backend : Backend_intf.S) = struct in fun () -> let open Deferred.Or_error.Let_syntax in - let%bind () = check_for_processor_trace_support () in let%bind () = check_for_perf () in let executable = match Shell.which prog with @@ -484,10 +521,11 @@ module Make_commands (Backend : Backend_intf.S) = struct | None -> failwithf "Can't find executable for %s" prog () in record_opt_fn ~executable ~f:(fun opts -> + let intel_pt = should_use_intel_pt opts.use_sampling in let elf = Elf.create opts.executable in let%bind pid = let argv = prog :: List.concat (Option.to_list argv) in - run_and_record opts ~elf ~debug_print_perf_commands ~prog ~argv + run_and_record opts ~elf ~debug_print_perf_commands ~prog ~argv ~intel_pt in let%bind.Deferred perf_maps = Perf_map.Table.load_by_pids [ pid ] in decode_to_trace @@ -496,6 +534,7 @@ module Make_commands (Backend : Backend_intf.S) = struct ~trace_mode:opts.trace_mode ~debug_print_perf_commands ~record_dir:opts.record_dir + ~intel_pt decode_opts)) ;; @@ -566,7 +605,6 @@ module Make_commands (Backend : Backend_intf.S) = struct in fun () -> let open Deferred.Or_error.Let_syntax in - let%bind () = check_for_processor_trace_support () in let%bind () = check_for_perf () in let%bind (pids : Pid.t list) = match pids with @@ -583,10 +621,11 @@ module Make_commands (Backend : Backend_intf.S) = struct |> fun pid -> Core_unix.readlink [%string "/proc/%{pid#Pid}/exe"] in record_opt_fn ~executable ~f:(fun opts -> - let { Record_opts.executable; when_to_snapshot; _ } = opts in + let { Record_opts.executable; when_to_snapshot; use_sampling; _ } = opts in + let intel_pt = should_use_intel_pt use_sampling in let%bind elf = create_elf ~executable ~when_to_snapshot in let%bind () = - attach_and_record opts ~elf ~debug_print_perf_commands pids + attach_and_record opts ~elf ~debug_print_perf_commands ~intel_pt pids in let%bind.Deferred perf_maps = Perf_map.Table.load_by_pids pids in decode_to_trace @@ -595,6 +634,7 @@ module Make_commands (Backend : Backend_intf.S) = struct ~trace_mode:opts.trace_mode ~debug_print_perf_commands ~record_dir:opts.record_dir + ~intel_pt decode_opts))) ;; @@ -631,6 +671,7 @@ module Make_commands (Backend : Backend_intf.S) = struct ~trace_mode ~debug_print_perf_commands ~record_dir + ~intel_pt:true decode_opts) ;; diff --git a/src/trace_writer.ml b/src/trace_writer.ml index 3f2a67cf05..1b554e2d9c 100644 --- a/src/trace_writer.ml +++ b/src/trace_writer.ml @@ -62,8 +62,8 @@ end module Callstack = struct type t = - { stack : Symbol.t Stack.t - ; create_time : Mapped_time.t + { stack : (Symbol.t * Int64.Hex.t) Stack.t + ; mutable create_time : Mapped_time.t } [@@deriving sexp_of] @@ -72,6 +72,19 @@ module Callstack = struct let pop t = Stack.pop t.stack let top t = Stack.top t.stack let is_empty t = Stack.is_empty t.stack + + let how_many_match { stack; create_time = _ } (future_callstack : Event.Location.t list) + = + let zipped_stacks, _ = + List.zip_with_remainder (Stack.to_list stack |> List.rev) future_callstack + in + let ans = + List.take_while zipped_stacks ~f:(fun ((_symbol, (addr : Int64.Hex.t)), location) -> + Int64.(addr = location.instruction_pointer)) + |> List.length + in + ans + ;; end module Thread_info = struct @@ -516,12 +529,12 @@ let create_thread t event = let call t thread_info ~time ~location = let ev = Pending_event.create_call location ~from_untraced:false in add_event t thread_info time ev; - Callstack.push thread_info.callstack location.symbol + Callstack.push thread_info.callstack (location.symbol, location.instruction_pointer) ;; let ret_without_checking_for_go_hacks t (thread_info : _ Thread_info.t) ~time = match Callstack.pop thread_info.callstack with - | Some symbol -> add_event t thread_info time { symbol; kind = Ret } + | Some (symbol, _) -> add_event t thread_info time { symbol; kind = Ret } | None -> (* No known stackframe was popped --- could occur if the start of the snapshot started in the middle of a tracing region *) @@ -594,14 +607,16 @@ end = struct ;; let current_stack_contains_known_gogo_destination (thread_info : _ Thread_info.t) = - Stack.find thread_info.callstack.stack ~f:is_known_gogo_destination |> Option.is_some + Stack.find thread_info.callstack.stack ~f:(fun (symbol, _) -> + is_known_gogo_destination symbol) + |> Option.is_some ;; let rec pop_until_gogo_destination t (thread_info : _ Thread_info.t) ~time = let ret = ret_without_checking_for_go_hacks in match Callstack.top thread_info.callstack with | None -> () - | Some symbol -> + | Some (symbol, _) -> ret t thread_info ~time; (* Return one past the known gogo destination. This hack is necessary because: @@ -629,7 +644,9 @@ end = struct end let ret t (thread_info : _ Thread_info.t) ~time : unit = - let returned_from = Callstack.top thread_info.callstack in + let returned_from = + Callstack.top thread_info.callstack |> Option.map ~f:(fun (symbol, _) -> symbol) + in ret_without_checking_for_go_hacks t thread_info ~time; Go_hacks.ret_track_gogo t thread_info ~time ~returned_from ;; @@ -645,7 +662,7 @@ let check_current_symbol with jumps between functions (e.g. tailcalls, PLT) or returns out of the highest known function, so we have to correct the top of the stack here. *) match Callstack.top thread_info.callstack with - | Some known when not ([%compare.equal: Symbol.t] known location.symbol) -> + | Some (known, _) when not ([%compare.equal: Symbol.t] known location.symbol) -> ret t thread_info ~time; call t thread_info ~time ~location | Some _ -> () @@ -659,7 +676,7 @@ let check_current_symbol time. *) let ev = Pending_event.create_call location ~from_untraced:true in write_pending_event t thread_info thread_info.callstack.create_time ev; - Callstack.push thread_info.callstack location.symbol + Callstack.push thread_info.callstack (location.symbol, location.instruction_pointer) ;; (* OCaml-specific hacks around tracking exception control flow. Supports two @@ -709,7 +726,7 @@ end = struct | With_exception_info _ -> () | Without_exception_info { frames_to_unwind } -> (match Callstack.top callstack with - | Some (From_perf symbol) -> + | Some (From_perf symbol, _) -> (match symbol with | "caml_next_frame_descriptor" -> incr frames_to_unwind | "caml_raise_exn" -> unwind_stack t thread_info ~time ~frames_to_unwind (-2) @@ -789,7 +806,7 @@ let assert_trace_mode t event trace_modes = (event : Event.t)] ;; -let end_of_trace (T t) = +let end_of_trace ?to_time (T t) = (* CR-someday cgaebel: I wish this iteration had a defined order; it'd make magic-trace a little bit more deterministic. *) Hashtbl.iter t.thread_info ~f:(fun thread_info -> @@ -797,7 +814,14 @@ let end_of_trace (T t) = t thread_info ~time:thread_info.last_event_time - ~is_kernel_address:false) + ~is_kernel_address:false; + match to_time with + | Some time -> + let mapped_time = map_time t time in + thread_info.pending_time <- mapped_time; + thread_info.last_event_time <- mapped_time; + thread_info.callstack.create_time <- mapped_time + | None -> ()) ;; (* Write perf_events into a file as a Fuschia trace (stack events). Events should be @@ -821,121 +845,142 @@ let write_event (T t) event = | Some ip -> is_kernel_address ip in end_of_thread t thread_info ~time ~is_kernel_address - | Ok (Power { thread = _; time = _; freq }) -> - write_counter - t - ~thread - ~name:"CPU" - ~time - ~args:Tracing.Trace.Arg.[ "freq (MHz)", Int freq ] - | Ok (Trace event) -> - let { Event.Ok.Trace.thread = _ (* Already used this to look up thread info. *) - ; time = _ (* Already in scope. Also, this time hasn't been [map_time]'d. *) - ; kind - ; trace_state_change - ; src - ; dst - } - = - event - in - Ocaml_hacks.track_executed_pushtraps_and_poptraps_in_range - t - thread_info - ~src - ~dst - ~time; - (match kind, trace_state_change with - | Some Call, (None | Some End) -> call t thread_info ~time ~location:dst - | ( Some (Call | Syscall | Return | Hardware_interrupt | Iret | Sysret | Jump) - , Some Start ) - | Some (Hardware_interrupt | Jump), Some End -> - raise_s - [%message - "BUG: magic-trace devs thought this event was impossible, but you just proved \ - them wrong. Please report this to \ - https://github.com/janestreet/magic-trace/issues/" - (event : Event.Ok.Trace.t)] - | None, Some End -> call t thread_info ~time ~location:Event.Location.untraced - | Some Syscall, Some End -> - (* We should only be getting these under /u *) - assert_trace_mode t outer_event [ Userspace ]; - call t thread_info ~time ~location:Event.Location.syscall - | Some Return, Some End -> call t thread_info ~time ~location:Event.Location.returned - | Some Return, None -> - Ocaml_hacks.ret_track_exn_data t thread_info ~time; - check_current_symbol t thread_info ~time dst - | None, Some Start -> - (* Might get this under /u, /k, and /uk, but we need to handle them all + | Ok + { Event.Ok.thread = _ (* Already used this to look up thread info. *) + ; time = _ (* Already in scope. Also, this time hasn't been [map_time]'d. *) + ; data + } -> + (match data with + | Power { freq } -> + write_counter + t + ~thread + ~name:"CPU" + ~time + ~args:Tracing.Trace.Arg.[ "freq (MHz)", Int freq ] + | Sample { callstack } -> + let how_many_ret = + Stack.length thread_info.callstack.stack + - Callstack.how_many_match thread_info.callstack callstack + in + List.init how_many_ret ~f:Fn.id |> List.iter ~f:(fun _ -> ret t thread_info ~time); + let how_many_call = + List.length callstack - Stack.length thread_info.callstack.stack + in + let calls = List.drop callstack (Stack.length thread_info.callstack.stack) in + let (_ : Event.Location.t list) = + List.init how_many_call ~f:Fn.id + |> List.fold ~init:calls ~f:(fun calls _ -> + match calls with + | head :: tail -> + call t thread_info ~time ~location:head; + tail + | [] -> + raise_s [%message "BUG: Unexpected behavior while parsing callstack."]) + in + () + | Trace { kind; trace_state_change; src; dst } -> + Ocaml_hacks.track_executed_pushtraps_and_poptraps_in_range + t + thread_info + ~src + ~dst + ~time; + (match kind, trace_state_change with + | Some Call, (None | Some End) -> call t thread_info ~time ~location:dst + | ( Some (Call | Syscall | Return | Hardware_interrupt | Iret | Sysret | Jump) + , Some Start ) + | Some (Hardware_interrupt | Jump), Some End -> + raise_s + [%message + "BUG: magic-trace devs thought this event was impossible, but you just \ + proved them wrong. Please report this to \ + https://github.com/janestreet/magic-trace/issues/" + (event : Event.t)] + | None, Some End -> call t thread_info ~time ~location:Event.Location.untraced + | Some Syscall, Some End -> + (* We should only be getting these under /u *) + assert_trace_mode t outer_event [ Userspace ]; + call t thread_info ~time ~location:Event.Location.syscall + | Some Return, Some End -> + call t thread_info ~time ~location:Event.Location.returned + | Some Return, None -> + Ocaml_hacks.ret_track_exn_data t thread_info ~time; + check_current_symbol t thread_info ~time dst + | None, Some Start -> + (* Might get this under /u, /k, and /uk, but we need to handle them all differently. *) - if Trace_mode.equal t.trace_mode Kernel - then ( - (* We're back in the kernel after having been in userspace. We have a + if Trace_mode.equal t.trace_mode Kernel + then ( + (* We're back in the kernel after having been in userspace. We have a brand new stack to work with. [clear_callstack] here should only be clearing the [untraced] frame here pushed by [End (Iret | Sysret)]. *) - clear_callstack t thread_info ~time; - Thread_info.set_callstack_from_addr - thread_info - ~addr:dst.instruction_pointer - ~time) - else if Callstack.is_empty thread_info.callstack - then - (* View stopping tracing always as a call (typically the result of a call + clear_callstack t thread_info ~time; + Thread_info.set_callstack_from_addr + thread_info + ~addr:dst.instruction_pointer + ~time) + else if Callstack.is_empty thread_info.callstack + then + (* View stopping tracing always as a call (typically the result of a call into a special library / linker), with starting tracing again as exiting it. The one exception is the initial start of the trace for that process, when there is no stack and a prior end won't have pushed a synthetic stack frame. *) - call t thread_info ~time ~location:dst - else - (* We don't call [check_current_symbol] here because stops don't change + call t thread_info ~time ~location:dst + else + (* We don't call [check_current_symbol] here because stops don't change the program location in most cases, and when a call to a symbol page faults, the restart after the page fault at the new location would get treated as a tail call if we did call [check_current_symbol]. *) - Ocaml_hacks.ret_track_exn_data t thread_info ~time - | Some ((Syscall | Hardware_interrupt) as kind), None -> - (* We should only be getting [Syscall] these under /uk, but we can get + Ocaml_hacks.ret_track_exn_data t thread_info ~time + | Some ((Syscall | Hardware_interrupt) as kind), None -> + (* We should only be getting [Syscall] these under /uk, but we can get [Hardware_interrupt] under /uk, /k. *) - [ [ Trace_mode.Userspace_and_kernel ] - ; (if [%compare.equal: Event.Kind.t] kind Hardware_interrupt - then [ Kernel ] - else []) - ] - |> List.concat - |> assert_trace_mode t outer_event; - (* A syscall or hardware interrupt can be modelled as operating on a new + [ [ Trace_mode.Userspace_and_kernel ] + ; (if [%compare.equal: Event.Kind.t] kind Hardware_interrupt + then [ Kernel ] + else []) + ] + |> List.concat + |> assert_trace_mode t outer_event; + (* A syscall or hardware interrupt can be modelled as operating on a new stack, and shouldn't be allowed to modify the previous stack. Also, hardware interrupts can occur during syscalls, so we maintain a "stack of callstacks" here. *) - Stack.push thread_info.inactive_callstacks thread_info.callstack; - Thread_info.set_callstack_from_addr thread_info ~addr:dst.instruction_pointer ~time; - call t thread_info ~time ~location:dst - | Some (Iret | Sysret), Some End -> - (* We should only be getting these under /k *) - assert_trace_mode t outer_event [ Kernel ]; - clear_callstack t thread_info ~time; - call t thread_info ~time ~location:Event.Location.untraced - | Some ((Iret | Sysret) as kind), None -> - (* We should only get [Sysret] under /uk, but might get [Iret] under /k as - well (because the kernel can be interrupted). *) - [ [ Trace_mode.Userspace_and_kernel ] - ; (if [%compare.equal: Event.Kind.t] kind Iret then [ Kernel ] else []) - ] - |> List.concat - |> assert_trace_mode t outer_event; - clear_callstack t thread_info ~time; - (match Stack.pop thread_info.inactive_callstacks with - | Some callstack -> thread_info.callstack <- callstack - | None -> + Stack.push thread_info.inactive_callstacks thread_info.callstack; Thread_info.set_callstack_from_addr thread_info ~addr:dst.instruction_pointer ~time; - check_current_symbol t thread_info ~time dst) - | Some Jump, None -> - Ocaml_hacks.check_current_symbol_track_entertraps t thread_info ~time dst - (* (None, _) comes up when perf spews something magic-trace doesn't recognize. + call t thread_info ~time ~location:dst + | Some (Iret | Sysret), Some End -> + (* We should only be getting these under /k *) + assert_trace_mode t outer_event [ Kernel ]; + clear_callstack t thread_info ~time; + call t thread_info ~time ~location:Event.Location.untraced + | Some ((Iret | Sysret) as kind), None -> + (* We should only get [Sysret] under /uk, but might get [Iret] under /k as + well (because the kernel can be interrupted). *) + [ [ Trace_mode.Userspace_and_kernel ] + ; (if [%compare.equal: Event.Kind.t] kind Iret then [ Kernel ] else []) + ] + |> List.concat + |> assert_trace_mode t outer_event; + clear_callstack t thread_info ~time; + (match Stack.pop thread_info.inactive_callstacks with + | Some callstack -> thread_info.callstack <- callstack + | None -> + Thread_info.set_callstack_from_addr + thread_info + ~addr:dst.instruction_pointer + ~time; + check_current_symbol t thread_info ~time dst) + | Some Jump, None -> + Ocaml_hacks.check_current_symbol_track_entertraps t thread_info ~time dst + (* (None, _) comes up when perf spews something magic-trace doesn't recognize. Instead of crashing, ignore it and keep going. *) - | None, _ -> ()); - if !debug then print_s (sexp_of_inner t) + | None, _ -> ()); + if !debug then print_s (sexp_of_inner t)) ;; diff --git a/src/trace_writer.mli b/src/trace_writer.mli index 7d4983ff23..d5a0b70a3a 100644 --- a/src/trace_writer.mli +++ b/src/trace_writer.mli @@ -33,4 +33,8 @@ val create_expert -> t val write_event : t -> Event.t -> unit -val end_of_trace : t -> unit + +(** Updates interal data structures when trace ends. If [to_time] is passed, will + shift to new start time which is useful when writing out multiple snapshots + from perf. *) +val end_of_trace : ?to_time:Time_ns.Span.t -> t -> unit diff --git a/test/perf_script.ml b/test/perf_script.ml index 29a12fb12f..45f573ab78 100644 --- a/test/perf_script.ml +++ b/test/perf_script.ml @@ -74,17 +74,19 @@ let run ?(debug = false) ?ocaml_exception_info ~trace_mode file = in let should_print_perf_line (event : Event.t) = match event with - | Ok (Trace event) -> - (* Most of a trace is just jumps within a single function. Those are basically + | Ok event -> + (match event.data with + | Trace data -> + (* Most of a trace is just jumps within a single function. Those are basically uninteresting to magic-trace, so skip them to keep tests a little cleaner. *) - not ([%compare.equal: Symbol.t] event.src.symbol event.dst.symbol) + not ([%compare.equal: Symbol.t] data.src.symbol data.dst.symbol) + | _ -> true) | Error _ -> true - | Ok (Power _) -> true in List.iter lines ~f:(fun line -> if not (String.is_empty line) then ( - let event = Perf_tool_backend.Perf_line.to_event line |> Option.value_exn in + let event = Perf_decode.For_testing.to_event [ line ] |> Option.value_exn in let event = adjust_event_time event in (* Most of a trace is just jumps within a single function. Those are basically uninteresting to magic-trace, so skip them to keep tests a little cleaner. *) diff --git a/test/test.ml b/test/test.ml index 174465cb5b..514f45ca61 100644 --- a/test/test.ml +++ b/test/test.ml @@ -43,26 +43,28 @@ end = struct |> String.of_char_list) ;; - let random_location () : Event.Location.t = - { instruction_pointer = addr (); symbol = From_perf ""; symbol_offset = offset () } + let random_location ?symbol () : Event.Location.t = + { instruction_pointer = addr () + ; symbol = Option.value symbol ~default:(Symbol.From_perf "") + ; symbol_offset = offset () + } ;; - let random_ok_event () : Event.Ok.Trace.t = - { Event.Ok.Trace.thread + let random_ok_event ?kind ?symbol () : Event.Ok.t = + { thread ; time = time () - ; trace_state_change = None - ; kind = Some Call - ; src = random_location () - ; dst = random_location () + ; data = + Trace + { trace_state_change = None + ; kind = Some (Option.value kind ~default:Event.Kind.Call) + ; src = random_location () + ; dst = random_location ?symbol () + } } ;; let start_recording () = Queue.clear events - - let random_event' kind symbol : Event.t = - let event = random_ok_event () in - Ok (Trace { event with kind = Some kind; dst = { event.dst with symbol } }) - ;; + let random_event' kind symbol : Event.t = Ok (random_ok_event ~kind ~symbol ()) let call () = let symbol = symbol () in @@ -81,14 +83,11 @@ end = struct Queue.enqueue events (Ok - (Trace - { thread - ; time - ; trace_state_change = None - ; kind = Some kind - ; src = loc - ; dst = loc - })) + { thread + ; time + ; data = + Trace { trace_state_change = None; kind = Some kind; src = loc; dst = loc } + }) ;; let ret () = @@ -131,7 +130,9 @@ module With = struct end let dump_using_file events = - let decode_result = { Decode_result.events; close_result = return (Ok ()) } in + let decode_result = + { Decode_result.events = [ events ]; close_result = return (Ok ()) } + in let buf = Iobuf.create ~len:500_000 in let destination = Tracing_zero.Destinations.iobuf_destination buf in let writer = Tracing_zero.Writer.Expert.create ~destination () in