diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc new file mode 100644 index 00000000..36551f7f --- /dev/null +++ b/probe_src/probe_frontend/.envrc @@ -0,0 +1,3 @@ +use_flake + +export __PROBE_LOG=info diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock new file mode 100644 index 00000000..04d1a6d1 --- /dev/null +++ b/probe_src/probe_frontend/Cargo.lock @@ -0,0 +1,1346 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.6.0", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading 0.8.4", +] + +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "color-eyre" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55146f5e46f237f7423d74111267d4597b59b0dad0ffaf7303bce9945d843ad5" +dependencies = [ + "backtrace", + "color-spantrace", + "eyre", + "indenter", + "once_cell", + "owo-colors", + "tracing-error", +] + +[[package]] +name = "color-spantrace" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "darling" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "exec" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "886b70328cba8871bfc025858e1de4be16b1d5088f2ba50b57816f4210672615" +dependencies = [ + "errno 0.2.8", + "libc", +] + +[[package]] +name = "eyre" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libloading" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "machine-info" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d0bcde250f7927612edb0807ada4ad1d92915d9632d917df9bf696e74095dce" +dependencies = [ + "anyhow", + "log", + "nvml-wrapper", + "serde", + "sysinfo", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "nvml-wrapper" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "288bd66a5a56d8c97b178412b328419b3fdec261c0cbc4628ddc49cc16db8fc6" +dependencies = [ + "bitflags 1.3.2", + "libloading 0.7.4", + "nvml-wrapper-sys", + "static_assertions", + "thiserror", + "wrapcenum-derive", +] + +[[package]] +name = "nvml-wrapper-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d606d4edf766969f16828ec047ca9aa96652a17bd353dc0613bfaca49b61d6" +dependencies = [ + "libloading 0.7.4", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.2", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "probe_cli" +version = "0.2.0" +dependencies = [ + "chrono", + "clap", + "color-eyre", + "env_logger", + "exec", + "flate2", + "libc", + "log", + "probe_frontend", + "rand", + "serde", + "serde_json", + "tar", +] + +[[package]] +name = "probe_frontend" +version = "0.2.0" +dependencies = [ + "bindgen", + "libc", + "log", + "machine-info", + "probe_macros", + "rayon", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "probe_macros" +version = "0.2.0" +dependencies = [ + "parking_lot", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.6.0", + "errno 0.3.9", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.26.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c18a6156d1f27a9592ee18c1a846ca8dd5c258b7179fc193ae87c74ebb666f5" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "winapi", +] + +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "sharded-slab", + "thread_local", + "tracing-core", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "wrapcenum-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml new file mode 100644 index 00000000..5b25b713 --- /dev/null +++ b/probe_src/probe_frontend/Cargo.toml @@ -0,0 +1,24 @@ +[workspace] +resolver = "2" +members = [ + "cli", + "lib", + "macros", +] + +[workspace.package] +version = "0.2.0" +license = "MIT" +# authors *MUST* be defined in the form "name " for parsing reasons +authors = [ + "Jenna Fligor ", + "Samuel Grayson " +] +publish = false +edition = "2021" + +[workspace.lints.rust] +unsafe_op_in_unsafe_fn = "forbid" + +[workspace.metadata.crane] +name = "probe" diff --git a/probe_src/probe_frontend/LICENSE b/probe_src/probe_frontend/LICENSE new file mode 100644 index 00000000..404acc08 --- /dev/null +++ b/probe_src/probe_frontend/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Jenna Fligor and Samuel Grayson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/probe_src/probe_frontend/README.md b/probe_src/probe_frontend/README.md new file mode 100644 index 00000000..4d993678 --- /dev/null +++ b/probe_src/probe_frontend/README.md @@ -0,0 +1,130 @@ + +# PROBE Frontend + +Tools for recording and manipulating libprobe provenance. + +## Terminology + +The documentation in this project assumes the reader understands a couple pieces +of terminology specific to this tool. + +- **Probe record** (or probe recording) +This is an itermediate representation when creating a probe log. (see the section +on serialization formats for more details) + +- **Probe log** +This is a file (`probe_log` by default) that encodes the data from a probe +record in a format that is cross-platform and much easier to use. (see the +section on serialization format for details) + +- **Transcription** +This is the process of converting a probe record to a probe log. + +- **Translation** +This is the process of polypeptide synthesis from mRNA strands generated during +[**transcription**](https://en.wikipedia.org/wiki/Transcription_(biology)). +(joke) + +## Using the CLI to create probe logs + +the simplest invocation of the `probe` cli is + +```bash +probe record +``` + +this will run `` under the benevolent supervision of libprobe, outputting +the probe record to a temporary directory. Upon the process exiting, `probe` it +will transcribe the record directory and write a probe log file named `probe_log` in +the current directory. + +If you run this again you'll notice it throws an error that the output file +already exists, solve this by passing `-o ` to specify a new file to write +the log to, or by passing `-f` to overwrite the previous log. + +The transcription process can take a while after the program exits, if you don't +want to automatically transcribe the record, you can pass the `-n` flag, this +will change the default output path from `probe_log` to `probe_record`, and will +output a probe record directory that can be transcribed to a probe log later +with the `probe transcribe` command, however the probe record format is not +stable, users are strongly encouraged to have `probe record` automatically +transcribe the record directory immediately after the process exits. If you do +seperate the trancription step from recording, then transcription **must** be +done on the same machine with the exact same version of the cli (and other +constraints, see the section on serialization format for more details). + +### Subshells + +`probe record` does **not** pass your command through a shell, any +subshell or environment substitutions will still be performed by your shell +before the arguments are passed to `probe`. But it won't understand flow control +statements like `if` and `for`, shell builtins like `cd`, or shell +aliases/functions. + +If you need these you can either write a shell script and +invoke `probe record` on that, or else run: + +```bash +probe record bash -c '' +``` + +(any flag after the first positional argument is ignored and treated like a +command argument). + +## Serialization formats + +### Probe record directory + +The format of the probe record directory is defined by libprobe and not part of +this tool's spec, however a best-effort explanation is still given. + +- Each probe record directory is composed of a top-level directory containing +one or more PID directories. + +- Each PID directory has a numeric name corresponding to the PID of the process +who's provenance is recorded inside it, and in turn contains one or more exec +epoch directories. + +- Each exec epoch directory has a numeric name corresponding to the exec epoch +of the virtual memory space who's provenance is recorded inside it, and in turn +contains one or more TID directories. + +- Each TID directory has a numeric name corresponding to the TID of the thread +who's provenance is recorded inside it, it contains two subdirectories named +`data` and `ops` + +- The `data` and `ops` directories both contains one or more files of the form +`X.dat` where `X` is a number, the `.dat` files inside the `data` directory are +called "data arenas", while those in the `ops` directory are called "op arenas". + +- Each op arena is a binary file containing an arena header followed by zero or +more op c structs, followed by zero or more null bytes. + +- Each data arena is a binary file containing an arena header followed by zero +or more bytes of arbitrary data, followed by zero or more null bytes. + +**note:** these files contain +[mmap(2)](https://www.man7.org/linux/man-pages/man2/mmap.2.html)-ed c structures +and are not guaranteed to valid if moved to a computer with a different +architecture, kernel version, or c compiler (or if any of those things change on +the same computer), and may not be properly decoded by versions of the cli with +even patch version differences. + +### Probe log directory + +This format **is** part of this tool's spec, and this tool is the source of +truth for its format. + +- The format of the top-level, PID, and exec epoch directories is the same as +for the probe record directory described above, but rather than containing TID +directories, each exec epoch directory contains one or more TID files. + +- Each TID file has a numeric name corresponding to the TID of the thread who's +provenance is recorded inside it. It is a [jsonlines](https://jsonlines.org/) +file, where each line is an op (as defined in this library) serialized as json. + +### Probe log file + +This format is simply a probe log directory that's bundled into a tar archive +and compressed with gzip, since its easier to move as a single file and +compresses well. diff --git a/probe_src/probe_frontend/cli/Cargo.toml b/probe_src/probe_frontend/cli/Cargo.toml new file mode 100644 index 00000000..4c1ebdc8 --- /dev/null +++ b/probe_src/probe_frontend/cli/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "probe_cli" +version.workspace = true +license.workspace = true +authors.workspace = true +publish.workspace = true +edition.workspace = true + +[[bin]] +name = "probe" +path = "src/main.rs" + +[dependencies] +chrono = "0.4.38" +clap = { version = "4.5.7", features = ["cargo"] } +color-eyre = "0.6.3" +env_logger = "0.11.3" +exec = "0.3.1" +flate2 = "1.0.30" +libc = "0.2.155" +log = "0.4.21" +probe_frontend = { path = "../lib" } +rand = "0.8.5" +serde = "1.0.203" +serde_json = "1.0.118" +tar = "0.4.41" + +[lints] +workspace = true diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs new file mode 100644 index 00000000..c7d3aaf8 --- /dev/null +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -0,0 +1,404 @@ +use std::{ + fs::File, + io::{Read, Write}, + path::Path, +}; + +use chrono::{DateTime, SecondsFormat}; +use color_eyre::eyre::{eyre, Result, WrapErr}; +use probe_frontend::ops; +use serde::{Deserialize, Serialize}; + +/// Print the ops from a probe log out for humans. +/// +/// This hides some of the data and so is not suitable for machine consumption use +/// [`to_stdout_json()`] instead. +pub fn to_stdout>(tar_path: P) -> Result<()> { + dump_internal(tar_path, |(pid, epoch, tid), ops| { + let mut stdout = std::io::stdout().lock(); + for op in ops { + writeln!(stdout, "{}.{}.{} >>> {}", pid, epoch, tid, op.dump())?; + } + Ok(()) + }) +} + +/// Prints the ops from a probe log out for machine consumption. +/// +/// The ops are emitted one on each line, in the form: +/// +/// ``` +/// { "pid": X, "exec_epoch": Y, "tid": Z, "op": {...} } +/// ``` +/// +/// (without whitespace) +pub fn to_stdout_json>(tar_path: P) -> Result<()> { + dump_internal(tar_path, |(pid, epoch, tid), ops| { + let mut stdout = std::io::stdout().lock(); + + for op in ops { + let json = serde_json::to_string(&DumpOp { + pid, + exec_epoch: epoch, + tid, + op, + })?; + writeln!(stdout, "{}", json)?; + } + Ok(()) + }) +} + +fn dump_internal, F: Fn((usize, usize, usize), Vec) -> Result<()>>( + tar_path: P, + printer: F, +) -> Result<()> { + let file = flate2::read::GzDecoder::new(File::open(&tar_path).wrap_err_with(|| { + eyre!(format!( + "Failed to open input file '{}'", + tar_path.as_ref().to_string_lossy() + )) + })?); + + let mut tar = tar::Archive::new(file); + + tar.entries() + .wrap_err("Unable to get tarball entry iterator")? + .try_for_each(|x| { + let mut entry = x.wrap_err("Unable to extract tarball entry")?; + + let path = entry + .path() + .wrap_err("Error getting path of tarball entry")? + .as_ref() + // this forced UTF-8 conversion is permitted because these paths are strictly + // within the tarball *we wrote*, so the paths should be all ASCII + .to_str() + .ok_or_else(|| eyre!("Tarball entry path not valid UTF-8"))? + .to_owned(); + + // if path == "_metadata" { + // return Ok(()); + // } + + let mut buf = String::new(); + let size = entry + .read_to_string(&mut buf) + .wrap_err("unable to read contents of tarball entry")?; + + // this is the case where the entry is a directory + if size == 0 { + return Ok(()); + } + + let hierarchy = path + .split('/') + .map(|x| { + x.parse::() + .wrap_err(format!("Unable to convert path component '{x}' to integer")) + }) + .collect::, _>>() + .wrap_err("Unable to extract PID.EPOCH.TID hierarchy")?; + + if hierarchy.len() != 3 { + return Err(eyre!("malformed PID.EPOCH.TID hierarchy")); + } + let op_id_triple = (hierarchy[0], hierarchy[1], hierarchy[2]); + + let ops = buf + .split('\n') + .filter_map(|x| { + if x.is_empty() { + return None; + } + Some(serde_json::from_str::(x).wrap_err("Error deserializing Op")) + }) + .collect::, _>>() + .wrap_err("Failed to deserialize TID file")?; + + printer(op_id_triple, ops)?; + + Ok(()) + }) +} + +/// Helper struct constructed from pid/epoch/tid hierarchy information and an op. Used for +/// serialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DumpOp { + pid: usize, + exec_epoch: usize, + tid: usize, + op: ops::Op, +} + +// OPTIMIZE: Display won't work (foreign trait rule) but some kind of streaming would greatly +// reduce unnecessary heap allocations and mem-copies; if we don't care about UTF-8 guarantees we +// might be able to do some kind of byte iterator approach and evaluate it all lazily +trait Dump { + fn dump(&self) -> String; +} + +impl Dump for ops::StatxTimestamp { + fn dump(&self) -> String { + match DateTime::from_timestamp(self.sec, self.nsec) { + Some(x) => x.to_rfc3339_opts(SecondsFormat::Secs, true), + None => "[INVALID TIMESTAMP]".to_owned(), + } + } +} + +impl Dump for ops::Timeval { + fn dump(&self) -> String { + match DateTime::from_timestamp(self.sec, self.usec as u32 * 1000) { + Some(x) => x.to_rfc3339_opts(SecondsFormat::Secs, true), + None => "[INVALID TIMESTAMP]".to_owned(), + } + } +} + +impl Dump for ops::Statx { + fn dump(&self) -> String { + format!( + "[ uid={}, gid={}, mode={:#06o} ino={}, size={}, mtime={} ]", + self.uid, + self.gid, + self.mode, + self.ino, + self.size, + self.mtime.dump(), + ) + } +} + +impl Dump for ops::Rusage { + fn dump(&self) -> String { + format!( + "[ utime={}, stime={}, maxrss={} ]", + self.utime.dump(), + self.stime.dump(), + self.maxrss, + ) + } +} + +impl Dump for ops::Path { + fn dump(&self) -> String { + format!( + "[ dirfd={}, path='{}', inode={}, mtime={} ]", + self.dirfd_minus_at_fdcwd + libc::AT_FDCWD, + self.path.to_string_lossy(), + self.inode, + self.mtime.dump(), + ) + } +} + +impl Dump for ops::CloneOp { + fn dump(&self) -> String { + format!( + "[ task_type={}, task_id={}, errno={} ]", + self.task_type, self.task_id, self.ferrno, + ) + } +} + +impl Dump for ops::CloseOp { + fn dump(&self) -> String { + format!( + "[ low_fd={}, high_fd={}, errno={} ]", + self.low_fd, self.high_fd, self.ferrno, + ) + } +} + +impl Dump for ops::ExitOp { + fn dump(&self) -> String { + format!( + "[ satus={}, run_atexit_handlers={} ]", + self.status, self.run_atexit_handlers, + ) + } +} + +impl Dump for ops::GetRUsageOp { + fn dump(&self) -> String { + format!( + "[ waitpid_arg={}, getrusage_arg={}, usage={}, errno={} ]", + self.waitpid_arg, + self.getrusage_arg, + self.usage.dump(), + self.ferrno, + ) + } +} + +impl Dump for ops::InitProcessOp { + fn dump(&self) -> String { + format!("[ pid={} ]", self.pid) + } +} + +impl Dump for ops::InitThreadOp { + fn dump(&self) -> String { + format!("[ tid={} ]", self.tid) + } +} + +impl Dump for ops::WaitOp { + fn dump(&self) -> String { + format!( + "[ task_type={}, task_id={}, options={}, status={}, errno={} ]", + self.task_type, self.task_id, self.options, self.status, self.ferrno, + ) + } +} + +impl Dump for ops::InitExecEpochOp { + fn dump(&self) -> String { + format!( + "[ epoch={}, program_name={} ]", + self.epoch, + self.program_name.to_string_lossy(), + ) + } +} + +impl Dump for ops::OpenOp { + fn dump(&self) -> String { + format!( + "[ path={}, flags={}, mode={:#06o} fd={}, errno={} ]", + self.path.dump(), + self.flags, + self.mode, + self.fd, + self.ferrno, + ) + } +} + +impl Dump for ops::ChdirOp { + fn dump(&self) -> String { + format!("[ path={}, errno={} ]", self.path.dump(), self.ferrno,) + } +} + +impl Dump for ops::ExecOp { + fn dump(&self) -> String { + format!("[ path={}, errno={} ]", self.path.dump(), self.ferrno,) + } +} + +impl Dump for ops::AccessOp { + fn dump(&self) -> String { + format!( + "[ path={}, mode={:#06o}, flags={}, errno={} ]", + self.path.dump(), + self.mode, + self.flags, + self.ferrno, + ) + } +} + +impl Dump for ops::StatOp { + fn dump(&self) -> String { + format!( + "[ path={}, flags={}, statx_buf={}, errno={} ]", + self.path.dump(), + self.flags, + self.statx_buf.dump(), + self.ferrno, + ) + } +} + +impl Dump for ops::ReaddirOp { + fn dump(&self) -> String { + format!( + "[ dir={}, child='{}', all_children={}, errno={} ]", + self.dir.dump(), + self.child.to_string_lossy(), + self.all_children, + self.ferrno, + ) + } +} + +impl Dump for ops::Metadata { + fn dump(&self) -> String { + match self { + ops::Metadata::Mode { mode, .. } => format!("Mode[ mode={:#06o} ]", mode), + ops::Metadata::Ownership { uid, gid, .. } => { + format!("Ownership[ uid={}, gid={} ]", uid, gid) + } + ops::Metadata::Times { + is_null, + atime, + mtime, + .. + } => format!( + "Times[ is_null={}, atime={}, mtime={} ]", + is_null, + atime.dump(), + mtime.dump() + ), + } + } +} + +impl Dump for ops::UpdateMetadataOp { + fn dump(&self) -> String { + format!( + "[ path={}, flags={}, metadata={}, errno={} ]", + self.path.dump(), + self.flags, + self.metadata.dump(), + self.ferrno, + ) + } +} + +impl Dump for ops::ReadLinkOp { + fn dump(&self) -> String { + format!( + "[ path={}, resolved='{}', errno={} ]", + self.path.dump(), + self.resolved.to_string_lossy(), + self.ferrno + ) + } +} + +impl Dump for ops::OpInternal { + fn dump(&self) -> String { + fn wfmt(x: &str, y: &impl Dump) -> String { + format!("{}{}", x, y.dump()) + } + + match self { + ops::OpInternal::InitProcessOp(x) => wfmt("InitProcessOp", x), + ops::OpInternal::InitExecEpochOp(x) => wfmt("InitExecEpochOp", x), + ops::OpInternal::InitThreadOp(x) => wfmt("InitThreadOp", x), + ops::OpInternal::OpenOp(x) => wfmt("OpenOp", x), + ops::OpInternal::CloseOp(x) => wfmt("CloseOp", x), + ops::OpInternal::ChdirOp(x) => wfmt("ChdirOp", x), + ops::OpInternal::ExecOp(x) => wfmt("ExecOp", x), + ops::OpInternal::CloneOp(x) => wfmt("CloneOp", x), + ops::OpInternal::ExitOp(x) => wfmt("ExitOp", x), + ops::OpInternal::AccessOp(x) => wfmt("AccessOp", x), + ops::OpInternal::StatOp(x) => wfmt("StatOp", x), + ops::OpInternal::ReaddirOp(x) => wfmt("ReadirOp", x), + ops::OpInternal::WaitOp(x) => wfmt("WaitOp", x), + ops::OpInternal::GetRUsageOp(x) => wfmt("GetRUsageOp", x), + ops::OpInternal::UpdateMetadataOp(x) => wfmt("UpdateMetadataOp", x), + ops::OpInternal::ReadLinkOp(x) => wfmt("ReadLinkOp", x), + } + } +} + +impl Dump for ops::Op { + fn dump(&self) -> String { + self.data.dump() + } +} diff --git a/probe_src/probe_frontend/cli/src/main.rs b/probe_src/probe_frontend/cli/src/main.rs new file mode 100644 index 00000000..1c0b7a5c --- /dev/null +++ b/probe_src/probe_frontend/cli/src/main.rs @@ -0,0 +1,148 @@ +use std::{ffi::OsString, fs::File}; + +use clap::{arg, command, value_parser, Command}; +use color_eyre::eyre::{eyre, Context, Result}; +use flate2::Compression; + +/// Output the ops from a probe log file to stdout. +mod dump; + +/// Run commands under provenance and generate probe record directory. +mod record; + +/// Wrapper over [`probe_frontend::transcribe`]. +mod transcribe; + +/// Utility code for creating temporary directories. +mod util; + +fn main() -> Result<()> { + color_eyre::install()?; + env_logger::Builder::from_env(env_logger::Env::new().filter_or("__PROBE_LOG", "warn")).init(); + log::debug!("Logger initialized"); + + let matches = command!() + .about("Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs.") + .propagate_version(true) + .subcommands([ + Command::new("record") + .args([ + arg!(-o --output "Set destinaton for recording.") + .required(false) + .value_parser(value_parser!(OsString)), + arg!(-f --overwrite "Overwrite existing output if it exists.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(-n --"no-transcribe" "Emit PROBE record rather than PROBE log.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(--gdb "Run under gdb.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(--debug "Run in verbose & debug build of libprobe.") + .required(false) + .value_parser(value_parser!(bool)), + arg!( ... "Command to execute under provenance.") + .required(true) + .trailing_var_arg(true) + .value_parser(value_parser!(OsString)), + ]) + .about("Execute a command and record its provenance"), + Command::new("transcribe") + .args([ + arg!(-f --overwrite "Overwrite existing output if it exists.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(-o --output "Path to write the transcribed PROBE log.") + .required(false) + .default_value("probe_log") + .value_parser(value_parser!(OsString)), + arg!(-i --input "Path to read the PROBE record from.") + .required(false) + .default_value("probe_record") + .value_parser(value_parser!(OsString)), + ]) + .about("Convert PROBE records to PROBE logs."), + Command::new("dump") + .args([ + arg!(--json "Output JSON.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(-i --input "Path to load PROBE log from.") + .required(false) + .default_value("probe_log") + .value_parser(value_parser!(OsString)), + ]) + .about("Write the data from probe log data in a human-readable manner"), + Command::new("__gdb-exec-shim").hide(true).arg( + arg!( ... "Command to run") + .required(true) + .trailing_var_arg(true) + .value_parser(value_parser!(OsString)), + ), + ]) + .get_matches(); + + match matches.subcommand() { + Some(("record", sub)) => { + let output = sub.get_one::("output").cloned(); + let overwrite = sub.get_flag("overwrite"); + let no_transcribe = sub.get_flag("no-transcribe"); + let gdb = sub.get_flag("gdb"); + let debug = sub.get_flag("debug"); + let cmd = sub + .get_many::("CMD") + .unwrap() + .cloned() + .collect::>(); + + if no_transcribe { + record::record_no_transcribe(output, overwrite, gdb, debug, cmd) + } else { + record::record_transcribe(output, overwrite, gdb, debug, cmd) + } + .wrap_err("Record command failed") + } + Some(("transcribe", sub)) => { + let overwrite = sub.get_flag("overwrite"); + let output = sub.get_one::("output").unwrap().clone(); + let input = sub.get_one::("input").unwrap().clone(); + + if overwrite { + File::create(&output) + } else { + File::create_new(&output) + } + .wrap_err("Failed to create output file") + .map(|file| { + tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())) + }) + .and_then(|mut tar| transcribe::transcribe(input, &mut tar)) + .wrap_err("Transcribe command failed") + } + Some(("dump", sub)) => { + let json = sub.get_flag("json"); + let input = sub.get_one::("input").unwrap().clone(); + + if json { + dump::to_stdout_json(input) + } else { + dump::to_stdout(input) + } + .wrap_err("Dump command failed") + } + Some(("__gdb-exec-shim", sub)) => { + let cmd = sub + .get_many::("CMD") + .unwrap() + .cloned() + .collect::>(); + + let e = exec::Command::new(&cmd[0]).args(&cmd[1..]).exec(); + + Err(e).wrap_err("Shim failed to exec") + } + None => Err(eyre!("Subcommand expected, try --help for more info")), + _ => Err(eyre!("Unknown subcommand")), + } +} diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs new file mode 100644 index 00000000..396970ee --- /dev/null +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -0,0 +1,241 @@ +use std::{ + ffi::OsString, + fs::{self, File}, + os::unix::process::ExitStatusExt, + path::{Path, PathBuf}, + thread, +}; + +use color_eyre::eyre::{eyre, Result, WrapErr}; +use flate2::Compression; + +use crate::{transcribe, util::Dir}; + +// TODO: modularize and improve ergonomics (maybe expand builder pattern?) + +/// create a probe record directory from command arguments +pub fn record_no_transcribe( + output: Option, + overwrite: bool, + gdb: bool, + debug: bool, + cmd: Vec, +) -> Result<()> { + let output = match output { + Some(x) => fs::canonicalize(x).wrap_err("Failed to canonicalize record directory path")?, + None => { + let mut output = std::env::current_dir().wrap_err("Failed to get CWD")?; + output.push("probe_record"); + output + } + }; + + if overwrite { + if let Err(e) = fs::remove_dir_all(&output) { + match e.kind() { + std::io::ErrorKind::NotFound => (), + _ => return Err(e).wrap_err("Failed to remove exisitng record directory"), + } + } + } + + let record_dir = Dir::new(output).wrap_err("Failed to create record directory")?; + + Recorder::new(cmd, record_dir) + .gdb(gdb) + .debug(debug) + .record()?; + + Ok(()) +} + +/// create a probe log file from command arguments +pub fn record_transcribe( + output: Option, + overwrite: bool, + gdb: bool, + debug: bool, + cmd: Vec, +) -> Result<()> { + let output = match output { + Some(x) => x, + None => OsString::from("probe_log"), + }; + + let file = if overwrite { + File::create(&output) + } else { + File::create_new(&output) + } + .wrap_err("Failed to create output file")?; + + let mut tar = tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())); + + let mut record_dir = Recorder::new( + cmd, + Dir::temp(true).wrap_err("Failed to create record directory")?, + ) + .gdb(gdb) + .debug(debug) + .record()?; + + match transcribe::transcribe(&record_dir, &mut tar) { + Ok(_) => (), + Err(e) => { + log::error!( + "Error transcribing record directory, saving directory '{}'", + record_dir.as_ref().to_string_lossy() + ); + record_dir.drop = false; + return Err(e).wrap_err("Failed to transcirbe record directory"); + } + }; + + Ok(()) +} + +/// Builder for running processes under provenance. +// TODO: extract this into the library part of this project +#[derive(Debug)] +pub struct Recorder { + gdb: bool, + debug: bool, + + output: Dir, + cmd: Vec, +} + +impl Recorder { + /// runs the built recorder, on success returns the PID of launched process and the TempDir it + /// was recorded into + pub fn record(self) -> Result { + // reading and canonicalizing path to libprobe + let mut libprobe = fs::canonicalize(match std::env::var_os("__PROBE_LIB") { + Some(x) => PathBuf::from(x), + None => return Err(eyre!("couldn't find libprobe, are you using the wrapper?")), + }) + .wrap_err("unable to canonicalize libprobe path")?; + if self.debug || self.gdb { + log::debug!("Using debug version of libprobe"); + libprobe.push("libprobe-dbg.so"); + } else { + libprobe.push("libprobe.so"); + } + + // append any existing LD_PRELOAD overrides; libprobe needs to be explicitly converted from + // a PathBuf to a OsString because PathBuf::push() automatically adds path separators which + // is incorrect here. + let mut ld_preload = OsString::from(libprobe); + if let Some(x) = std::env::var_os("LD_PRELOAD") { + ld_preload.push(":"); + ld_preload.push(&x); + } + + let mut child = if self.gdb { + let mut dir_env = OsString::from("--init-eval-command=set environment __PROBE_DIR="); + dir_env.push(self.output.path()); + let mut preload_env = OsString::from("--init-eval-command=set environment LD_PRELOAD="); + preload_env.push(ld_preload); + + let self_bin = + std::env::current_exe().wrap_err("Failed to get path to current executable")?; + + std::process::Command::new("gdb") + .arg(dir_env) + .arg(preload_env) + .arg("--args") + .arg(self_bin) + .arg("__gdb-exec-shim") + .args(&self.cmd) + .env_remove("__PROBE_LIB") + .env_remove("__PROBE_LOG") + .spawn() + .wrap_err("Failed to launch gdb")? + } else { + std::process::Command::new(&self.cmd[0]) + .args(&self.cmd[1..]) + .env_remove("__PROBE_LIB") + .env_remove("__PROBE_LOG") + .env("__PROBE_DIR", self.output.path()) + .env("LD_PRELOAD", ld_preload) + .spawn() + .wrap_err("Failed to launch child process")? + }; + + if !self.gdb { + // without this the child process typically won't have written it's first op by the + // time we do our sanity check, since we're about to wait on child anyway, this isn't a + // big deal. + thread::sleep(std::time::Duration::from_millis(50)); + + match Path::read_dir(self.output.path()) { + Ok(x) => { + let any_files = x + .into_iter() + .try_fold(false, |_, x| x.map(|x| x.path().exists()))?; + if !any_files { + log::warn!( + "No arena files detected after 50ms, \ + something is wrong, you should probably abort!" + ); + } + } + Err(e) => { + return Err(e).wrap_err( + "Unable to read record directory during post-startup sanity check", + ) + } + } + } + + // OPTIMIZE: consider background serialization of ops as threads/processes exit instead of + // waiting until the end; large increase to complexity but potentially huge gains. + let exit = child.wait().wrap_err("Failed to await child process")?; + if !exit.success() { + match exit.code() { + Some(code) => log::warn!("Recorded process exited with code {code}"), + None => match exit.signal() { + Some(sig) => match crate::util::sig_to_name(sig) { + Some(name) => log::warn!("Recorded process exited with signal {name}"), + None => { + if sig < libc::SIGRTMAX() { + log::warn!("Recorded process exited with realtime signal {sig}"); + } else { + log::warn!("Recorded process exited with unknown signal {sig}"); + } + } + }, + None => log::warn!("Recorded process exited with unknown error"), + }, + } + } + + Ok(self.output) + } + + /// Create new [`Recorder`] from a command and the directory where it should write the probe + /// record. + /// + /// `cmd[0]` will be used as the command while `cmd[1..]` will be used as the arguments. + pub fn new(cmd: Vec, output: Dir) -> Self { + Self { + gdb: false, + debug: false, + + output, + cmd, + } + } + + /// Set if the process should be run under gdb, implies debug. + pub fn gdb(mut self, gdb: bool) -> Self { + self.gdb = gdb; + self + } + + /// Set if the debug version of libprobe should be used. + pub fn debug(mut self, debug: bool) -> Self { + self.debug = debug; + self + } +} diff --git a/probe_src/probe_frontend/cli/src/transcribe.rs b/probe_src/probe_frontend/cli/src/transcribe.rs new file mode 100644 index 00000000..799df9b1 --- /dev/null +++ b/probe_src/probe_frontend/cli/src/transcribe.rs @@ -0,0 +1,21 @@ +use std::{io::Write, path::Path}; + +use color_eyre::eyre::{Result, WrapErr}; + +use crate::util::Dir; + +pub fn transcribe, T: Write>( + record_dir: P, + tar: &mut tar::Builder, +) -> Result<()> { + let log_dir = Dir::temp(true).wrap_err("Failed to create temp directory for transcription")?; + + probe_frontend::transcribe::parse_top_level(record_dir, &log_dir) + .wrap_err("Failed to transcribe record directory")?; + + tar.append_dir_all(".", &log_dir) + .wrap_err("Failed to copy output dir into archive")?; + tar.finish().wrap_err("Failed to finish writing tarball")?; + + Ok(()) +} diff --git a/probe_src/probe_frontend/cli/src/util.rs b/probe_src/probe_frontend/cli/src/util.rs new file mode 100644 index 00000000..3547a38c --- /dev/null +++ b/probe_src/probe_frontend/cli/src/util.rs @@ -0,0 +1,135 @@ +use std::{ + fs, io, + path::{Path, PathBuf}, +}; + +use color_eyre::eyre::{Context, Result}; +use rand::Rng; + +/// Represents a newly created directory and optionally acts as a RAII guard that (attempts to) +/// delete the directory and anything in it when dropped. +#[derive(Debug)] +pub struct Dir { + /// path to created directory + path: PathBuf, + + /// drop flag, if this is `true` when [`Dir`] is dropped then the drop hook will call + /// [`fs::remove_dir_all()`] on `path`, if this fails it will log a warning but take no other + /// action. + pub drop: bool, +} + +impl Dir { + /// Attempts to create a new directory at `path`. + /// + /// By default directories created this way **are not** deleted when [`Dir`] is dropped. + #[inline] + pub fn new(path: PathBuf) -> Result { + fs::create_dir(&path).wrap_err("Failed to create named directory")?; + Ok(Self { path, drop: false }) + } + + /// Attempts to create a new tempoerary directory + /// + /// The directory is created in the path retunred by [`std::env::temp_dir()`] and is named + /// `probe-XXXXXXXX` where `X` is a random alphanumeric digit. Will try again (indefinitely) if + /// directory creation errors with [`AlreadyExists`](io::ErrorKind::AlreadyExists). + /// + /// By default directories created this way **are** deleted when [`Dir`] is dropped. + pub fn temp(drop: bool) -> Result { + fn rand_alphanumeric(len: usize) -> String { + const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + abcdefghijklmnopqrstuvwxyz\ + 0123456789"; + + let mut rng = rand::thread_rng(); + + (0..len) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect() + } + + let mut path = std::env::temp_dir(); + path.push(format!("probe-{}", rand_alphanumeric(8))); + + match fs::create_dir(&path) { + Ok(_) => Ok(Self { path, drop }), + Err(e) => match e.kind() { + io::ErrorKind::AlreadyExists => Self::temp(drop), + _ => Err(e).wrap_err("Failed to create temp directory"), + }, + } + } + + #[inline] + pub fn path(&self) -> &Path { + self.path.as_path() + } +} + +impl AsRef for Dir { + fn as_ref(&self) -> &Path { + self.path.as_path() + } +} + +impl Drop for Dir { + fn drop(&mut self) { + if self.drop { + if let Err(e) = fs::remove_dir_all(&self.path) { + log::warn!( + "Failed to remove temporary directory '{}' because: {}", + self.path.to_string_lossy(), + e + ); + } + } + } +} + +pub(crate) fn sig_to_name(sig: i32) -> Option<&'static str> { + Some(match sig { + libc::SIGHUP => "SIGHUP", + libc::SIGINT => "SIGINT", + libc::SIGQUIT => "SIGQUIT", + libc::SIGILL => "SIGILL", + libc::SIGTRAP => "SIGTRAP", + libc::SIGABRT => "SIGABRT/SIGIOT", // SIGABRT and SIGIOT have the same code + libc::SIGBUS => "SIGBUS", + libc::SIGFPE => "SIGFPE", + libc::SIGKILL => "SIGKILL", + libc::SIGUSR1 => "SIGUSR1", + libc::SIGSEGV => "SIGSEGV", + libc::SIGUSR2 => "SIGUSR2", + libc::SIGPIPE => "SIGPIPE", + libc::SIGALRM => "SIGALRM", + libc::SIGTERM => "SIGTERM", + libc::SIGSTKFLT => "SIGSTKFLT", + libc::SIGCHLD => "SIGCHLD", + libc::SIGCONT => "SIGCONT", + libc::SIGSTOP => "SIGSTOP", + libc::SIGTSTP => "SIGTSTP", + libc::SIGTTIN => "SIGTTIN", + libc::SIGTTOU => "SIGTTOU", + libc::SIGURG => "SIGURG", + libc::SIGXCPU => "SIGXCPU", + libc::SIGXFSZ => "SIGXFSZ", + libc::SIGVTALRM => "SIGVTALRM", + libc::SIGPROF => "SIGPROF", + libc::SIGWINCH => "SIGWINCH", + libc::SIGIO => "SIGIO/SIGPOLL", // SIGIO and SIGPOLL have the same code + libc::SIGPWR => "SIGPWR", + libc::SIGSYS => "SIGSYS", + + _ => return None, + }) +} + +#[test] +fn sig_eq() { + assert_eq!(libc::SIGABRT, libc::SIGIOT); + assert_eq!(libc::SIGIO, libc::SIGPOLL); +} diff --git a/probe_src/probe_frontend/configure b/probe_src/probe_frontend/configure new file mode 100755 index 00000000..699751ed --- /dev/null +++ b/probe_src/probe_frontend/configure @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e +cd "$(dirname "$(realpath "$0")")" +mkdir -p ./lib/include +cp ../libprobe/include/prov_ops.h ./lib/include/prov_ops.h +git add ./lib/include diff --git a/probe_src/probe_frontend/deny.toml b/probe_src/probe_frontend/deny.toml new file mode 100644 index 00000000..539bf8b0 --- /dev/null +++ b/probe_src/probe_frontend/deny.toml @@ -0,0 +1,209 @@ +# The graph table configures how the dependency graph is constructed and thus +# which crates the checks are performed against +[graph] +# When creating the dependency graph used as the source of truth when checks are +# executed, this field can be used to prune crates from the graph, removing them +# from the view of cargo-deny. This is an extremely heavy hammer, as if a crate +# is pruned from the graph, all of its dependencies will also be pruned unless +# they are connected to another crate in the graph that hasn't been pruned, +# so it should be used with care. The identifiers are [Package ID Specifications] +# (https://doc.rust-lang.org/cargo/reference/pkgid-spec.html) +#exclude = [] +# If true, metadata will be collected with `--all-features`. Note that this can't +# be toggled off if true, if you want to conditionally enable `--all-features` it +# is recommended to pass `--all-features` on the cmd line instead +all-features = false +# If true, metadata will be collected with `--no-default-features`. The same +# caveat with `all-features` applies +no-default-features = false +# If set, these feature will be enabled when collecting metadata. If `--features` +# is specified on the cmd line they will take precedence over this option. +#features = [] + +# The output table provides options for how/if diagnostics are outputted +[output] +# When outputting inclusion graphs in diagnostics that include features, this +# option can be used to specify the depth at which feature edges will be added. +# This option is included since the graphs can be quite large and the addition +# of features from the crate(s) to all of the graph roots can be far too verbose. +# This option can be overridden via `--feature-depth` on the cmd line +feature-depth = 1 + +# This section is considered when running `cargo deny check advisories` +# More documentation for the advisories section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html +[advisories] +# The path where the advisory databases are cloned/fetched into +#db-path = "$CARGO_HOME/advisory-dbs" +# The url(s) of the advisory databases to use +#db-urls = ["https://github.com/rustsec/advisory-db"] +# A list of advisory IDs to ignore. Note that ignored advisories will still +# output a note when they are encountered. +ignore = [ + #"RUSTSEC-0000-0000", + #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, + #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish + #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, +] +# If this is true, then cargo deny will use the git executable to fetch advisory database. +# If this is false, then it uses a built-in git library. +# Setting this to true can be helpful if you have special authentication requirements that cargo-deny does not support. +# See Git Authentication for more information about setting up git authentication. +#git-fetch-with-cli = true + +# This section is considered when running `cargo deny check licenses` +# More documentation for the licenses section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html +[licenses] +# List of explicitly allowed licenses +# See https://spdx.org/licenses/ for list of possible licenses +# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. +allow = [ + #OSI approved FOSS licenses (will expand as needed) + "Apache-2.0 WITH LLVM-exception", + "Apache-2.0", + "BSD-3-Clause", + "ISC", + "MIT", + "Unicode-DFS-2016", +] +# The confidence threshold for detecting a license from license text. +# The higher the value, the more closely the license text must be to the +# canonical license text of a valid SPDX license file. +# [possible values: any between 0.0 and 1.0]. +confidence-threshold = 0.8 +# Allow 1 or more licenses on a per-crate basis, so that particular licenses +# aren't accepted for every possible crate as with the normal allow list +exceptions = [ + # Each entry is the crate and version constraint, and its specific allow + # list + #{ allow = ["Zlib"], crate = "adler32" }, +] + +# Some crates don't have (easily) machine readable licensing information, +# adding a clarification entry for it allows you to manually specify the +# licensing information +#[[licenses.clarify]] +# The package spec the clarification applies to +#crate = "ring" +# The SPDX expression for the license requirements of the crate +#expression = "MIT AND ISC AND OpenSSL" +# One or more files in the crate's source used as the "source of truth" for +# the license expression. If the contents match, the clarification will be used +# when running the license check, otherwise the clarification will be ignored +# and the crate will be checked normally, which may produce warnings or errors +# depending on the rest of your configuration +#license-files = [ +# Each entry is a crate relative path, and the (opaque) hash of its contents +#{ path = "LICENSE", hash = 0xbd0eed23 } +#] + +[licenses.private] +# If true, ignores workspace crates that aren't published, or are only +# published to private registries. +# To see how to mark a crate as unpublished (to the official registry), +# visit https://doc.rust-lang.org/cargo/reference/manifest.html#the-publish-field. +#ignore = false +# One or more private registries that you might publish crates to, if a crate +# is only published to private registries, and ignore is true, the crate will +# not have its license(s) checked +#registries = [ +# #"https://sekretz.com/registry +#] + +# This section is considered when running `cargo deny check bans`. +# More documentation about the 'bans' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html +[bans] +# Lint level for when multiple versions of the same crate are detected +multiple-versions = "warn" +# Lint level for when a crate version requirement is `*` +wildcards = "allow" +# The graph highlighting used when creating dotgraphs for crates +# with multiple versions +# * lowest-version - The path to the lowest versioned duplicate is highlighted +# * simplest-path - The path to the version with the fewest edges is highlighted +# * all - Both lowest-version and simplest-path are used +highlight = "all" +# The default lint level for `default` features for crates that are members of +# the workspace that is being checked. This can be overridden by allowing/denying +# `default` on a crate-by-crate basis if desired. +workspace-default-features = "allow" +# The default lint level for `default` features for external crates that are not +# members of the workspace. This can be overridden by allowing/denying `default` +# on a crate-by-crate basis if desired. +external-default-features = "allow" +# List of crates that are allowed. Use with care! +allow = [ + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" }, +] +# List of crates to deny +deny = [ + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" }, + # Wrapper crates can optionally be specified to allow the crate when it + # is a direct dependency of the otherwise banned crate + #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] }, +] + +# List of features to allow/deny +# Each entry the name of a crate and a version range. If version is +# not specified, all versions will be matched. +#[[bans.features]] +#crate = "reqwest" +# Features to not allow +#deny = ["json"] +# Features to allow +#allow = [ +# "rustls", +# "__rustls", +# "__tls", +# "hyper-rustls", +# "rustls", +# "rustls-pemfile", +# "rustls-tls-webpki-roots", +# "tokio-rustls", +# "webpki-roots", +#] +# If true, the allowed features must exactly match the enabled feature set. If +# this is set there is no point setting `deny` +#exact = true + +# Certain crates/versions that will be skipped when doing duplicate detection. +skip = [ + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" }, +] +# Similarly to `skip` allows you to skip certain crates during duplicate +# detection. Unlike skip, it also includes the entire tree of transitive +# dependencies starting at the specified crate, up to a certain depth, which is +# by default infinite. +skip-tree = [ + #"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies + #{ crate = "ansi_term@0.11.0", depth = 20 }, +] + +# This section is considered when running `cargo deny check sources`. +# More documentation about the 'sources' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html +[sources] +# Lint level for what to happen when a crate from a crate registry that is not +# in the allow list is encountered +unknown-registry = "warn" +# Lint level for what to happen when a crate from a git repository that is not +# in the allow list is encountered +unknown-git = "warn" +# List of URLs for allowed crate registries. Defaults to the crates.io index +# if not specified. If it is specified but empty, no registries are allowed. +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# List of URLs for allowed Git repositories +allow-git = [] + +[sources.allow-org] +# 1 or more github.com organizations to allow git sources for +github = [] +# 1 or more gitlab.com organizations to allow git sources for +gitlab = [] +# 1 or more bitbucket.org organizations to allow git sources for +bitbucket = [] diff --git a/probe_src/probe_frontend/flake.lock b/probe_src/probe_frontend/flake.lock new file mode 100644 index 00000000..3594030f --- /dev/null +++ b/probe_src/probe_frontend/flake.lock @@ -0,0 +1,120 @@ +{ + "nodes": { + "advisory-db": { + "flake": false, + "locked": { + "lastModified": 1720572893, + "narHash": "sha256-EQfU1yMnebn7LoJNjjsQimyuWwz+2YzazqUZu8aX/r4=", + "owner": "rustsec", + "repo": "advisory-db", + "rev": "97a2dc75838f19a5fd63dc3f8e3f57e0c4c8cfe6", + "type": "github" + }, + "original": { + "owner": "rustsec", + "repo": "advisory-db", + "type": "github" + } + }, + "crane": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1720546058, + "narHash": "sha256-iU2yVaPIZm5vMGdlT0+57vdB/aPq/V5oZFBRwYw+HBM=", + "owner": "ipetkov", + "repo": "crane", + "rev": "2d83156f23c43598cf44e152c33a59d3892f8b29", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1720594544, + "narHash": "sha256-w6dlBUQYvS65f0Z33TvkcAj7ITr4NFqhF5ywss5T5bU=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "aa9461550594533c29866d42f861b6ff079a7fb6", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "advisory-db": "advisory-db", + "crane": "crane", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1720577957, + "narHash": "sha256-RZuzLdB/8FaXaSzEoWLg3au/mtbuH7MGn2LmXUKT62g=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "a434177dfcc53bf8f1f348a3c39bfb336d760286", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix new file mode 100644 index 00000000..298dd67d --- /dev/null +++ b/probe_src/probe_frontend/flake.nix @@ -0,0 +1,221 @@ +{ + description = "libprobe frontend"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + + crane = { + url = "github:ipetkov/crane"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + flake-utils.url = "github:numtide/flake-utils"; + + advisory-db = { + url = "github:rustsec/advisory-db"; + flake = false; + }; + + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; + + # TODO: cleanup derivations and make more usable: + # - version of probe cli with bundled libprobe and wrapper script + # - python code as actual module + # (this may require merging this flake with the top-level one) + outputs = { + self, + nixpkgs, + crane, + flake-utils, + advisory-db, + rust-overlay, + ... + }: let + systems = { + # "nix system" = "rust target"; + "x86_64-linux" = "x86_64-unknown-linux-musl"; + "i686-linux" = "i686-unknown-linux-musl"; + "aarch64-linux" = "aarch64-unknown-linux-musl"; + "armv7l-linux" = "armv7-unknown-linux-musleabi"; + }; + in + flake-utils.lib.eachSystem (builtins.attrNames systems) (system: let + pkgs = import nixpkgs { + inherit system; + overlays = [(import rust-overlay)]; + }; + + craneLib = (crane.mkLib pkgs).overrideToolchain (p: + p.rust-bin.stable.latest.default.override { + targets = [systems.${system}]; + }); + + src = ./.; + + # Common arguments can be set here to avoid repeating them later + commonArgs = { + inherit src; + strictDeps = true; + + # all the crates in this workspace either use rust-bindgen or depend + # on local crate that does. + nativeBuildInputs = [ + pkgs.rustPlatform.bindgenHook + ]; + + # pygen needs to know where to write the python file + preConfigurePhases = [ + "pygenConfigPhase" + ]; + pygenConfigPhase = '' + mkdir -p ./python + export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" + ''; + + CARGO_BUILD_TARGET = "${systems.${system}}"; + CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; + }; + + # Build *just* the cargo dependencies (of the entire workspace), + # so we can reuse all of that work (e.g. via cachix) when running in CI + # It is *highly* recommended to use something like cargo-hakari to avoid + # cache misses when building individual top-level-crates + cargoArtifacts = craneLib.buildDepsOnly commonArgs; + + individualCrateArgs = + commonArgs + // { + # inherit cargoArtifacts; + inherit (craneLib.crateNameFromCargoToml {inherit src;}) version; + # disable tests since we'll run them all via cargo-nextest + doCheck = false; + }; + + # Build the top-level crates of the workspace as individual derivations. + # This allows consumers to only depend on (and build) only what they need. + # Though it is possible to build the entire workspace as a single derivation, + # so this is left up to you on how to organize things + probe-frontend = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-frontend"; + cargoExtraArgs = "-p probe_frontend"; + installPhase = '' + cp -r ./python/ $out + cp ./LICENSE $out/LICENSE + ''; + }); + probe-py = let + workspace = (builtins.fromTOML (builtins.readFile ./Cargo.toml)).workspace; + in + pkgs.substituteAllFiles rec { + name = "probe-py-${version}"; + src = probe-frontend; + files = [ + "./pyproject.toml" + "./LICENSE" + "./probe_py/generated/__init__.py" + "./probe_py/generated/ops.py" + "./probe_py/generated/probe.py" + ]; + + authors = builtins.concatStringsSep "" (builtins.map (match: let + name = builtins.elemAt match 0; + email = builtins.elemAt match 1; + in "\n {name = \"${name}\", email = \"${email}\"},") ( + builtins.map + (author-str: builtins.match "(.+) <(.+)>" author-str) + (workspace.package.authors) + )); + version = workspace.package.version; + }; + probe-cli = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-cli"; + cargoExtraArgs = "-p probe_cli"; + }); + probe-macros = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-macros"; + cargoExtraArgs = "-p probe_macros"; + }); + in { + checks = { + # Build the crates as part of `nix flake check` for convenience + inherit probe-frontend probe-py probe-cli probe-macros; + + # Run clippy (and deny all warnings) on the workspace source, + # again, reusing the dependency artifacts from above. + # + # Note that this is done as a separate derivation so that + # we can block the CI if there are issues here, but not + # prevent downstream consumers from building our crate by itself. + probe-workspace-clippy = craneLib.cargoClippy (commonArgs + // { + inherit cargoArtifacts; + cargoClippyExtraArgs = "--all-targets -- --deny warnings"; + }); + + probe-workspace-doc = craneLib.cargoDoc (commonArgs + // { + inherit cargoArtifacts; + }); + + # Check formatting + probe-workspace-fmt = craneLib.cargoFmt { + inherit src; + }; + + # Audit dependencies + probe-workspace-audit = craneLib.cargoAudit { + inherit src advisory-db; + }; + + # Audit licenses + probe-workspace-deny = craneLib.cargoDeny { + inherit src; + }; + + # Run tests with cargo-nextest + # this is why `doCheck = false` on the crate derivations, so as to not + # run the tests twice. + probe-workspace-nextest = craneLib.cargoNextest (commonArgs + // { + inherit cargoArtifacts; + partitions = 1; + partitionType = "count"; + }); + + probe-pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' + cp ${probe-py}/probe_py/generated/ops.py $out + ${pkgs.python312}/bin/python $out + ''; + }; + + packages = { + inherit probe-cli probe-py probe-frontend probe-macros; + }; + + devShells.default = craneLib.devShell { + # Inherit inputs from checks. + checks = self.checks.${system}; + + shellHook = '' + export __PROBE_LIB="$(realpath ../libprobe/build)" + export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" + ''; + + packages = [ + pkgs.cargo-audit + pkgs.cargo-expand + pkgs.cargo-flamegraph + pkgs.cargo-watch + pkgs.gdb + pkgs.rust-analyzer + ]; + }; + }); +} diff --git a/probe_src/probe_frontend/lib/Cargo.toml b/probe_src/probe_frontend/lib/Cargo.toml new file mode 100644 index 00000000..90b871e2 --- /dev/null +++ b/probe_src/probe_frontend/lib/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "probe_frontend" +version.workspace = true +license.workspace = true +authors.workspace = true +publish.workspace = true +edition.workspace = true + + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "probe_frontend" +path = "src/lib.rs" + +[dependencies] +libc = "0.2.155" +log = "0.4.21" +machine-info = "1.0.9" +probe_macros = { path = "../macros" } +rayon = "1.10.0" +serde = { version = "1.0.203", features = ["serde_derive"] } +serde_json = "1.0.118" +thiserror = "1.0.61" + +[build-dependencies] +bindgen = "0.69.4" + +[lints] +workspace = true diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs new file mode 100644 index 00000000..a6a3e228 --- /dev/null +++ b/probe_src/probe_frontend/lib/build.rs @@ -0,0 +1,166 @@ +use std::collections::HashSet; +use std::env; +use std::path::PathBuf; +use std::sync::OnceLock; + +use bindgen::callbacks::ParseCallbacks; + +#[derive(Debug)] +struct LibprobeCallback; + +/// These C-structs get prefixed with "C_" because a rust version of the struct will be +/// either generated or manually implemented. +fn should_prefix(name: &str) -> bool { + static LIST: OnceLock> = OnceLock::new(); + LIST.get_or_init(|| { + HashSet::from([ + "Path", + "InitProcessOp", + "InitExecEpochOp", + "InitThreadOp", + "OpenOp", + "CloseOp", + "ChdirOp", + "ExecOp", + "CloneOp", + "ExitOp", + "AccessOp", + "StatOp", + "ReaddirOp", + "WaitOp", + "GetRUsageOp", + "MetadataKind", + "MetadataValue", + "UpdateMetadataOp", + "ReadLinkOp", + "OpCode", + "Op", + "statx", + "rusage", + "statx_timestamp", + "timespec", + "timeval", + ]) + }) + .contains(name) +} + +/// These structs are parts of tagged unions and so the rust versions of the structs can't (yet) be +/// autogenerated and have to be implemented manually +fn no_derive(name: &str) -> bool { + static LIST: OnceLock> = OnceLock::new(); + LIST.get_or_init(|| { + HashSet::from([ + "MetadataKind", + "MetadataValue", + "UpdateMetadataOp", + "OpCode", + "Op", + ]) + }) + .contains(name) +} + +impl ParseCallbacks for LibprobeCallback { + fn item_name(&self, _original_item_name: &str) -> Option { + if should_prefix(_original_item_name) { + Some(format!("C_{}", _original_item_name)) + } else { + None + } + } + + fn add_derives(&self, info: &bindgen::callbacks::DeriveInfo<'_>) -> Vec { + let mut ret = vec![]; + + match info.kind { + bindgen::callbacks::TypeKind::Struct => { + let orig_name = info.name.strip_prefix("C_"); + if orig_name.is_some() && !no_derive(orig_name.unwrap()) { + ret.push("MakeRustOp".to_owned()); + } + } + bindgen::callbacks::TypeKind::Enum => (), + bindgen::callbacks::TypeKind::Union => (), + }; + + ret + } +} + +fn main() { + // Tell cargo to look for shared libraries in the specified directory + // println!("cargo:rustc-link-search=/path/to/lib"); + + // Tell cargo to tell rustc to link the system bzip2 + // shared library. + // println!("cargo:rustc-link-lib=bz2"); + + // The bindgen::Builder is the main entry point + // to bindgen, and lets you build up options for + // the resulting bindings. + let bindings = bindgen::Builder::default() + .header_contents( + "wrapper", + " + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + #include + #include + + // HACK: defining this manually instead of using is + // a huge hack, but it greatly reduces the generated code complexity + // since in glibc all the long ints are unions over two types that + // both alias to long int, this is done for kernel-userland + // compatibility reasons that don't matter here. + struct rusage { + struct timeval ru_utime; + struct timeval ru_stime; + long int ru_maxrss; + long int ru_ixrss; + long int ru_idrss; + long int ru_isrss; + long int ru_minflt; + long int ru_majflt; + long int ru_nswap; + long int ru_inblock; + long int ru_oublock; + long int ru_msgsnd; + long int ru_msgrcv; + long int ru_nsignals; + long int ru_nvcsw; + long int ru_nivcsw; + }; + + #define BORROWED + #define OWNED + ", + ) + // The input header we would like to generate + // bindings for. + .header("./include/prov_ops.h") + // .header_contents("sizeof", " + // const size_t OP_SIZE = sizeof(struct Op); + // ") + // only parse the Op type (and any types contained within, recursively) + .allowlist_item("^(Op)$") + // Tell cargo to invalidate the built crate whenever any of the + // included header files changed. + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .parse_callbacks(Box::new(LibprobeCallback {})) + // Finish the builder and generate the bindings. + .generate() + // Unwrap the Result and panic on failure. + .expect("Unable to generate bindings"); + + // Write the bindings to the $OUT_DIR/bindings.rs file. + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write bindings!"); +} diff --git a/probe_src/probe_frontend/lib/src/error.rs b/probe_src/probe_frontend/lib/src/error.rs new file mode 100644 index 00000000..8473f011 --- /dev/null +++ b/probe_src/probe_frontend/lib/src/error.rs @@ -0,0 +1,145 @@ +use std::num::ParseIntError; + +pub type Result = std::result::Result; + +#[non_exhaustive] +#[derive(Debug, thiserror::Error)] +pub enum ProbeError { + /// wrapper explaining where an occurred converting a [`C_` struct](crate::ops) to its rust + /// version, call [`root_cause()`](Self::root_cause()) to return the underlying error. + #[error("{msg}: {inner}")] + FFiConversionError { + msg: &'static str, + inner: Box, + }, + + /// The tag of a tagged union type from an [`C_` struct](crate::ops) isn't a valid variant of + /// that union + #[error("Invalid variant of tagged union")] + InvalidVariant(u32), + + /// A pointer from an [`C_` struct](crate::ops) couldn't be decoded into a byte slice. + #[error("Unable to decode pointer {0:#x}")] + InvalidPointer(usize), + + /// Unable to generate a [`CString`](std::ffi::CString) from a byte slice because it had no null byte. + #[error("Expected null byte but none found")] + MissingNull, + + /// Used instead of [`unreachable`] so that functions up the call stack can add + /// [context](Self::Context). + #[error("Reached code believed unreachable, please report this bug")] + UnreachableCode, + + /// An error occurred serializing or deserializing a struct into/from json. + #[error("(de)serialization error ({context}):\n{error}")] + JsonError { + context: &'static str, + error: serde_json::Error, + }, + + /// A generic wrapper around another [`ProbeError`] type that adds additional context, call + /// [`root_cause()`](Self::root_cause()) to return the underlying error. + #[error("{context}:\n{error}")] + Context { + context: &'static str, + error: Box, + }, + + /// A wrapper over a [`std::io::Error`] with a description of what the was being done when an + /// IO error occurred + #[error("{context}:\n{error}")] + ContextIO { + context: &'static str, + error: std::io::Error, + }, + + /// An external function returned [`None`] when [`Some`] was required, contains explanation. + // FIXME: this is an unhelpful error + #[error("{context}:\nNeeded Option was None")] + MissingOption { context: &'static str }, + + /// A wrapper over [`ArenaError`](crate::transcribe::ArenaError), see that type for variant + /// details. + #[error("{0}")] + ArenaError(crate::transcribe::ArenaError), + + /// An error occured trying to parse a string into an integer, this error is generally wrapped + /// in [context](Self::Context). + #[error("{0}")] + ParseIntError(ParseIntError), +} + +impl ProbeError { + /// Walks down the inner value(s) of one or more layers of [`Context`](Self::Context) or + /// [`FfiConversionError`](Self::FFiConversionError) and returns a reference to the underlying + /// error type, returns `&self` for other variants. + pub fn root_cause(&self) -> &ProbeError { + match self { + Self::Context { error, .. } => error.as_ref().root_cause(), + Self::FFiConversionError { inner, .. } => inner.as_ref().root_cause(), + _ => self, + } + } +} + +impl From for ProbeError { + fn from(value: crate::transcribe::ArenaError) -> Self { + Self::ArenaError(value) + } +} + +impl From for ProbeError { + fn from(value: ParseIntError) -> Self { + Self::ParseIntError(value) + } +} + +/// create new [`ProbeError::MissingOption`] with the given context +pub(crate) fn option_err(context: &'static str) -> ProbeError { + ProbeError::MissingOption { context } +} + +pub(crate) trait WrapErr { + fn wrap_err(self, context: &'static str) -> Result; +} + +impl WrapErr for std::result::Result { + fn wrap_err(self, context: &'static str) -> Result { + match self { + Ok(x) => Ok(x), + Err(e) => Err(e.convert(context)), + } + } +} + +pub(crate) trait ConvertErr { + fn convert(self, context: &'static str) -> ProbeError; +} + +impl ConvertErr for std::io::Error { + fn convert(self, context: &'static str) -> ProbeError { + ProbeError::ContextIO { + context, + error: self, + } + } +} + +impl ConvertErr for ProbeError { + fn convert(self, context: &'static str) -> ProbeError { + ProbeError::Context { + context, + error: Box::new(self), + } + } +} + +impl ConvertErr for serde_json::Error { + fn convert(self, context: &'static str) -> ProbeError { + ProbeError::JsonError { + context, + error: self, + } + } +} diff --git a/probe_src/probe_frontend/lib/src/lib.rs b/probe_src/probe_frontend/lib/src/lib.rs new file mode 100644 index 00000000..1ea39fa4 --- /dev/null +++ b/probe_src/probe_frontend/lib/src/lib.rs @@ -0,0 +1,30 @@ +/// transcribe probe record directories created by libprobe to log directories + +/// Op definitions from `prov_ops.h` +/// +/// This module contains ffi bindings for the raw C-structs emitted by libprobe, generated automatically with +/// rust-bindgen (these start with `C_`), as well as the converted version which can be serialized +/// +/// While simple Ops containing only Integral values can be used/serialized directory from +/// libprobe, more complicated structs containing pointers (usually in the form of strings) need to +/// be manually converted to versions so they can be serialized. This module re-exports the trivial +/// structs and defines new ones (as well as methods for converting) for the non-trivial structs. +/// +pub mod ops; + +/// Convert part of all of a probe record directory to a probe log directory. +/// +/// # Serialization format +/// +/// The serialization format output is very similar to the raw libprobe arena format. It's a +/// filesystem hierarchy of `//` but instead of `` being a directory containing +/// `ops` and `data` directories with the raw C-struct arenas, `` is a +/// [jsonlines](https://jsonlines.org/) file, where each line is a json representation of an +/// [`ops::Op`]. +pub mod transcribe; + +// currently unused, get system metadata +// mod metadata; + +/// Library error type and definitions. +pub mod error; diff --git a/probe_src/probe_frontend/lib/src/metadata.rs b/probe_src/probe_frontend/lib/src/metadata.rs new file mode 100644 index 00000000..9a9d6617 --- /dev/null +++ b/probe_src/probe_frontend/lib/src/metadata.rs @@ -0,0 +1,19 @@ +use machine_info::{Machine, SystemInfo}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct SystemMetadata { + entry_pid: libc::pid_t, + arch: &'static str, + system: SystemInfo, +} + +impl SystemMetadata { + pub fn new(pid: libc::pid_t) -> Self { + Self { + entry_pid: pid, + arch: std::env::consts::ARCH, + system: Machine::new().system_info(), + } + } +} diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs new file mode 100644 index 00000000..a49c0c2c --- /dev/null +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -0,0 +1,355 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +use crate::error::{ProbeError, Result}; +use crate::transcribe::ArenaContext; +use probe_macros::{MakeRustOp, PygenDataclass}; +use serde::{Deserialize, Serialize}; +use std::ffi::CString; + +/// Specialized version of [`std::convert::From`] for working with libprobe arena structs. +/// +/// Since `C_*` structs from arena allocator files have intrinsically invalid pointers (because +/// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] +/// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be +/// used to decode pointers. +/// +/// The autogenerated, rust versions of `C_*` structs implement this trait by recursively calling it +/// on each of it's fields. In order to make this work there are three base case implementations: +/// +/// - `*mut i8` and `*const i8` can (try to) be converted to [`CString`]s by looking up the +/// pointers in the [`ArenaContext`], +/// - Any type implementing [`Copy`], this base case just returns itself. +pub trait FfiFrom { + fn ffi_from(value: &T, ctx: &ArenaContext) -> Result + where + Self: Sized; +} + +impl FfiFrom for T { + #[inline] + fn ffi_from(value: &T, _: &ArenaContext) -> Result { + Ok(*value) + } +} +impl FfiFrom<*const i8> for CString { + #[inline] + fn ffi_from(value: &*const i8, ctx: &ArenaContext) -> Result { + try_cstring(*value, ctx) + } +} +impl FfiFrom<*mut i8> for CString { + #[inline] + fn ffi_from(value: &*mut i8, ctx: &ArenaContext) -> Result { + try_cstring(*value, ctx) + } +} + +/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. +/// +/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket +/// implementation as the reciprocal of [`FfiFrom`]. +/// +/// See [`FfiFrom`] for an explanation of how this is used in the conversion of `C_` structs +pub trait FfiInto { + fn ffi_into(&self, ctx: &ArenaContext) -> Result; +} + +impl FfiInto for T +where + U: FfiFrom, +{ + #[inline] + fn ffi_into(&self, ctx: &ArenaContext) -> Result { + U::ffi_from(self, ctx) + } +} + +fn try_cstring(str: *const i8, ctx: &ArenaContext) -> Result { + if str.is_null() { + std::ffi::CString::new("").map_err(|_| ProbeError::MissingNull) + } else { + match ctx.try_get_slice(str as usize) { + Some(x) => Ok(std::ffi::CStr::from_bytes_until_nul(x) + .map_err(|_| ProbeError::MissingNull)? + .to_owned()), + None => Err(ProbeError::InvalidPointer(str as usize)), + } + } +} + +// Bindings are generated by `../build.sh` and the MakeRustOp proc-macro +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + +// NOTE: the raw versions of these Ops are tagged unions, so currently they have to be manually +// implemented, this is somewhat confusing since they extensively use types and trait +// implementations that are auto-generated. + +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] +pub enum Metadata { + #[serde(untagged)] + Mode { + mode: mode_t, + + #[serde(serialize_with = "Metadata::serialize_variant_mode")] + #[serde(skip_deserializing)] + _type: (), + }, + #[serde(untagged)] + Ownership { + uid: uid_t, + gid: gid_t, + + #[serde(serialize_with = "Metadata::serialize_variant_ownership")] + #[serde(skip_deserializing)] + _type: (), + }, + #[serde(untagged)] + Times { + is_null: bool, + atime: Timeval, + mtime: Timeval, + + #[serde(serialize_with = "Metadata::serialize_variant_times")] + #[serde(skip_deserializing)] + _type: (), + }, +} + +impl Metadata { + fn serialize_variant_mode( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Mode") + } + fn serialize_variant_ownership( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Ownership") + } + fn serialize_variant_times( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Times") + } +} + +impl FfiFrom for Metadata { + fn ffi_from(value: &C_UpdateMetadataOp, ctx: &ArenaContext) -> Result { + let kind = value.kind; + let value = value.value; + + log::debug!("[unsafe] decoding Metadata tagged union"); + Ok(match kind { + C_MetadataKind_MetadataMode => Metadata::Mode { + mode: unsafe { value.mode }, + + _type: (), + }, + C_MetadataKind_MetadataOwnership => Metadata::Ownership { + uid: unsafe { value.ownership }.uid, + gid: unsafe { value.ownership }.gid, + + _type: (), + }, + C_MetadataKind_MetadataTimes => Metadata::Times { + is_null: unsafe { value.times }.is_null, + atime: unsafe { value.times }.atime.ffi_into(ctx)?, + mtime: unsafe { value.times }.mtime.ffi_into(ctx)?, + + _type: (), + }, + _ => return Err(ProbeError::InvalidVariant(kind)), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] +pub struct UpdateMetadataOp { + pub path: Path, + pub flags: ::std::os::raw::c_int, + pub metadata: Metadata, + pub ferrno: ::std::os::raw::c_int, + + #[serde(serialize_with = "UpdateMetadataOp::serialize_type")] + #[serde(skip_deserializing)] + pub _type: (), +} + +impl UpdateMetadataOp { + fn serialize_type( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("UpdateMetadataOp") + } +} + +impl FfiFrom for UpdateMetadataOp { + fn ffi_from(value: &C_UpdateMetadataOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.ffi_into(ctx)?, + flags: value.flags, + metadata: value + .ffi_into(ctx) + .map_err(|e| ProbeError::FFiConversionError { + msg: "Unable to decode Metadata", + inner: Box::new(e), + })?, + ferrno: value.ferrno, + + _type: (), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] +pub enum OpInternal { + #[serde(untagged)] + InitProcessOp(InitProcessOp), + #[serde(untagged)] + InitExecEpochOp(InitExecEpochOp), + #[serde(untagged)] + InitThreadOp(InitThreadOp), + #[serde(untagged)] + OpenOp(OpenOp), + #[serde(untagged)] + CloseOp(CloseOp), + #[serde(untagged)] + ChdirOp(ChdirOp), + #[serde(untagged)] + ExecOp(ExecOp), + #[serde(untagged)] + CloneOp(CloneOp), + #[serde(untagged)] + ExitOp(ExitOp), + #[serde(untagged)] + AccessOp(AccessOp), + #[serde(untagged)] + StatOp(StatOp), + #[serde(untagged)] + ReaddirOp(ReaddirOp), + #[serde(untagged)] + WaitOp(WaitOp), + #[serde(untagged)] + GetRUsageOp(GetRUsageOp), + #[serde(untagged)] + UpdateMetadataOp(UpdateMetadataOp), + #[serde(untagged)] + ReadLinkOp(ReadLinkOp), +} + +impl FfiFrom for OpInternal { + fn ffi_from(value: &C_Op, ctx: &ArenaContext) -> Result { + let kind = value.op_code; + let value = value.data; + + log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); + Ok(match kind { + C_OpCode_init_process_op_code => { + Self::InitProcessOp(unsafe { value.init_process_epoch }.ffi_into(ctx)?) + } + C_OpCode_init_exec_epoch_op_code => { + Self::InitExecEpochOp(unsafe { value.init_exec_epoch }.ffi_into(ctx)?) + } + C_OpCode_init_thread_op_code => { + Self::InitThreadOp(unsafe { value.init_thread }.ffi_into(ctx)?) + } + C_OpCode_open_op_code => Self::OpenOp(unsafe { value.open }.ffi_into(ctx)?), + C_OpCode_close_op_code => Self::CloseOp(unsafe { value.close }.ffi_into(ctx)?), + C_OpCode_chdir_op_code => Self::ChdirOp(unsafe { value.chdir }.ffi_into(ctx)?), + C_OpCode_exec_op_code => Self::ExecOp(unsafe { value.exec }.ffi_into(ctx)?), + C_OpCode_clone_op_code => Self::CloneOp(unsafe { value.clone }.ffi_into(ctx)?), + C_OpCode_exit_op_code => Self::ExitOp(unsafe { value.exit }.ffi_into(ctx)?), + C_OpCode_access_op_code => Self::AccessOp(unsafe { value.access }.ffi_into(ctx)?), + C_OpCode_stat_op_code => Self::StatOp(unsafe { value.stat }.ffi_into(ctx)?), + C_OpCode_readdir_op_code => Self::ReaddirOp(unsafe { value.readdir }.ffi_into(ctx)?), + C_OpCode_wait_op_code => Self::WaitOp(unsafe { value.wait }.ffi_into(ctx)?), + C_OpCode_getrusage_op_code => { + Self::GetRUsageOp(unsafe { value.getrusage }.ffi_into(ctx)?) + } + C_OpCode_update_metadata_op_code => { + Self::UpdateMetadataOp(unsafe { value.update_metadata }.ffi_into(ctx)?) + } + C_OpCode_read_link_op_code => { + Self::ReadLinkOp(unsafe { value.read_link }.ffi_into(ctx)?) + } + _ => return Err(ProbeError::InvalidVariant(kind)), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] +pub struct Op { + pub data: OpInternal, + pub time: Timespec, + pub pthread_id: pthread_t, + pub iso_c_thread_id: thrd_t, + + #[serde(serialize_with = "Op::serialize_type")] + #[serde(skip_deserializing)] + pub _type: (), +} + +impl Op { + fn serialize_type( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Op") + } +} + +impl FfiFrom for Op { + fn ffi_from(value: &C_Op, ctx: &ArenaContext) -> Result { + Ok(Self { + data: value.ffi_into(ctx)?, + time: value.time.ffi_into(ctx)?, + pthread_id: value.pthread_id, + iso_c_thread_id: value.iso_c_thread_id, + + _type: (), + }) + } +} + +probe_macros::pygen_add_preamble!( + "# https://github.com/torvalds/linux/blob/\ + 73e931504f8e0d42978bfcda37b323dbbd1afc08/include/uapi/linux/fcntl.h#L98", + "AT_FDCWD: int = -100" +); + +probe_macros::pygen_add_prop!(Path impl dirfd -> int: + "return self.dirfd_minus_at_fdcwd + AT_FDCWD" +); + +// WARNING: this macro invocation must come after all other pygen calls for those calls to be +// included in the written file +probe_macros::pygen_write_to_env!("PYGEN_OUTFILE"); + +#[cfg(test)] +mod tests { + // use super::*; + + // we define this constant in the generated python code, so we should make sure we get it + // right. + #[test] + fn at_fdcwd_sanity_check() { + assert_eq!(libc::AT_FDCWD, -100); + } + + // since we're defining a custom version of the rusage struct (indirectly through rust-bindgen) + // we should at least check that they're the same size. + // FIXME: muslc has a different sized rusage struct so libc::rusage doesn't match + // #[test] + // fn rusage_size() { + // assert_eq!( + // std::mem::size_of::(), + // std::mem::size_of::() + // ); + // } +} diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs new file mode 100644 index 00000000..a87d6c89 --- /dev/null +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -0,0 +1,431 @@ +use rayon::iter::{ParallelBridge, ParallelIterator}; +use std::{ + collections::HashMap, + ffi::{OsStr, OsString}, + fs::{self, DirEntry, File}, + io::Write, + mem::size_of, + path::{Path, PathBuf}, + time::SystemTime, +}; + +use crate::{ + error::{option_err, ConvertErr, ProbeError, Result, WrapErr}, + ops::{self, C_Op, FfiFrom}, +}; + +/// Recursively parse a whole probe record directory and write it to a probe log directory. +/// +/// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. +/// +/// on success, returns the number of Ops processed in the top-level directory +// OPTIMIZE: consider improved parallelism heuristic. +pub fn parse_top_level, P2: AsRef + Sync>( + in_dir: P1, + out_dir: P2, +) -> Result { + log::info!( + "Processing arena dir {} into output dir {}", + in_dir.as_ref().to_string_lossy(), + out_dir.as_ref().to_string_lossy() + ); + + let start = SystemTime::now(); + + let count = fs::read_dir(in_dir) + .wrap_err("Error opening record directory")? + .par_bridge() + .map(|x| { + parse_pid( + x.wrap_err("Error reading DirEntry from record directory")? + .path(), + &out_dir, + ) + }) + .try_fold(|| 0usize, |acc, x| x.map(|x| acc + x)) + .try_reduce(|| 0usize, |id, x| Ok(id + x))?; + + match SystemTime::now().duration_since(start) { + Ok(x) => log::info!("Processed {} Ops in {:.3} seconds", count, x.as_secs_f32()), + Err(_) => log::error!("Processing arena dir took negative time"), + }; + + Ok(count) +} + +/// Recursively parse a probe record PID directory and write it as a probe log PID directory. +/// +/// This function calls [`parse_exec_epoch()`] on each sub-directory in `in_dir`. +/// +/// On success, returns the number of Ops processed in the PID directory. +pub fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { + let pid = filename_numeric(&in_dir)?; + + let dir = { + let mut path = out_dir.as_ref().to_owned(); + path.push(pid.to_string()); + path + }; + + fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; + + fs::read_dir(in_dir) + .wrap_err("Error opening PID directory")? + // .par_bridge() + .map(|entry| { + parse_exec_epoch( + entry + .wrap_err("Error reading DirEntry from PID directory")? + .path(), + &dir, + ) + }) + .try_fold(0usize, |acc, x| x.map(|x| acc + x)) +} + +/// Recursively parse a probe record exec epoch directory and write it as a probe log exec epoch +/// directory. +/// +/// This function calls [`parse_tid()`] on each sub-directory in `in_dir`. +/// +/// On success, returns the number of Ops processed in the ExecEpoch directory. +pub fn parse_exec_epoch, P2: AsRef>( + in_dir: P1, + out_dir: P2, +) -> Result { + let epoch = filename_numeric(&in_dir)?; + + let dir = { + let mut path = out_dir.as_ref().to_owned(); + path.push(epoch.to_string()); + path + }; + + fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; + + fs::read_dir(in_dir) + .wrap_err("Error opening ExecEpoch directory")? + // .par_bridge() + .map(|entry| { + parse_tid( + entry + .wrap_err("Error reading DirEntry from ExecEpoch directory")? + .path(), + &dir, + ) + }) + .try_fold(0usize, |acc, x| x.map(|x| acc + x)) +} + +/// Recursively parse a probe record TID directory and write it as a probe log TID directory. +/// +/// This function parses a TID directory in 6 steps: +/// +/// 1. Output file is created. +/// 2. Paths of sub-directory are parsed into a [`HashMap`]. +/// 3. `data` directory is is read and parsed into [`DataArena`]s which are then parsed into an +/// [`ArenaContext`]. +/// 4. `ops` directory is read and parsed into [`OpsArena`]s. +/// 5. [`OpsArena`]s are parsed into which are then parsed into [`ops::Op`]s using the +/// [`ArenaContext`]. +/// 6. [`ops::Op`]s are serialized into json and written line-by-line into the output directory. +/// +/// (steps 5 & 6 are done lazily with iterators to reduce unnecessary memory allocations) +/// +/// On success, returns the number of Ops processed in the TID directory. +pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { + fn try_files_from_dir>(dir: P) -> Result> { + match fs::read_dir(&dir) { + Ok(entry_iter) => entry_iter + .map(|entry_result| { + entry_result + .map(|entry| entry.path()) + .wrap_err("Error reading DirEntry from record TID subdirectory") + }) + .collect::>>(), + Err(e) => Err(e.convert("Error opening record TID directory")), + } + } + + // STEP 1 + let tid = filename_numeric(&in_dir)?; + let mut outfile = { + let mut path = out_dir.as_ref().to_owned(); + path.push(tid.to_string()); + File::create_new(path).wrap_err("Failed to create TID output file")? + }; + + // STEP 2 + let paths = fs::read_dir(&in_dir) + .wrap_err("Error reading record TID directory")? + .filter_map(|entry_result| match entry_result { + Ok(entry) => Some((entry.file_name(), entry)), + Err(e) => { + log::warn!("Error reading DirEntry in TID directory: {}", e); + None + } + }) + .collect::>(); + + // STEP 3 + let ctx = ArenaContext( + try_files_from_dir( + paths + .get(OsStr::new("data")) + .ok_or_else(|| option_err("Missing data directory from TID directory"))? + .path(), + )? + .into_iter() + .map(|data_dat_file| { + DataArena::from_bytes( + std::fs::read(&data_dat_file) + .wrap_err("Failed to read file from data directory")?, + filename_numeric(&data_dat_file)?, + ) + }) + .collect::>>()?, + ); + + // STEP 4 + let mut count: usize = 0; + try_files_from_dir( + paths + .get(OsStr::new("ops")) + .ok_or_else(|| option_err("Missing ops directory from TID directory"))? + .path(), + )? + // STEP 5 + .into_iter() + .map(|ops_dat_file| { + std::fs::read(&ops_dat_file) + .wrap_err("Failed to read file from ops directory") + .and_then(|file_contents| { + OpsArena::from_bytes(file_contents, filename_numeric(&ops_dat_file)?) + .wrap_err("Error constructing OpsArena")? + .decode(&ctx) + .wrap_err("Error decoding OpsArena") + }) + }) + // STEP 6 + .try_for_each(|arena_file_ops| { + for op in arena_file_ops? { + outfile + .write_all( + serde_json::to_string(&op) + .wrap_err("Unable to serialize Op")? + .as_bytes(), + ) + .wrap_err("Failed to write serialized Op to tempfile")?; + outfile + .write_all("\n".as_bytes()) + .wrap_err("Failed to write newline deliminator to tempfile")?; + count += 1; + } + + Ok::<(), ProbeError>(()) + })?; + + Ok(count) +} + +/// Gets the [`file stem`](Path::file_stem()) from a path and returns it parsed as an integer. +/// +/// Errors if the path has no file stem (see [`Path::file_stem()`] for details), the file stem +/// isn't valid UTF-8, or the filename can't be parsed as an integer. +// TODO: cleanup errors, better context +fn filename_numeric>(dir: P) -> Result { + let file_stem = dir.as_ref().file_stem().ok_or_else(|| { + log::error!("'{}' has no file stem", dir.as_ref().to_string_lossy()); + option_err("path has no file stem") + })?; + + file_stem + .to_str() + .ok_or_else(|| { + log::error!("'{}' not valid UTF-8", file_stem.to_string_lossy()); + option_err("filename not valid UTF-8") + })? + .parse::() + .map_err(|e| { + log::error!( + "Parsing filename '{}' to integer", + file_stem.to_string_lossy() + ); + ProbeError::from(e) + }) + .wrap_err("Failed to parse filename to integer") +} + +/// this struct represents a `/data` probe record directory. +pub struct ArenaContext(pub Vec); + +impl ArenaContext { + pub fn try_get_slice(&self, ptr: usize) -> Option<&[u8]> { + for vec in self.0.iter() { + if let Some(x) = vec.try_get_slice(ptr) { + return Some(x); + } + } + None + } +} + +/// This struct represents a single `data/*.dat` file from a probe record directory. +pub struct DataArena { + header: ArenaHeader, + raw: Vec, +} + +impl DataArena { + pub fn from_bytes(bytes: Vec, instantiation: usize) -> Result { + let header = ArenaHeader::from_bytes(&bytes, instantiation) + .wrap_err("Failed to create ArenaHeader for DataArena")?; + + Ok(Self { header, raw: bytes }) + } + + pub fn try_get_slice(&self, ptr: usize) -> Option<&[u8]> { + let end = self.header.base_address + self.header.used; + match ptr >= self.header.base_address && ptr <= end { + false => None, + true => Some(unsafe { + let new_ptr = self.raw.as_ptr().add(ptr - self.header.base_address); + let len = end - ptr; + + core::slice::from_raw_parts(new_ptr, len) + }), + } + } +} + +/// This struct represents a single `ops/*.dat` file from a probe record directory. +pub struct OpsArena<'a> { + // raw is needed even though it's unused since ops is a reference to it; + // the compiler doesn't know this since it's constructed using unsafe code. + #[allow(dead_code)] + /// raw byte buffer of Ops arena allocator. + raw: Vec, + /// slice over Ops of the raw buffer. + ops: &'a [C_Op], +} + +impl<'a> OpsArena<'a> { + pub fn from_bytes(bytes: Vec, instantiation: usize) -> Result { + let header = ArenaHeader::from_bytes(&bytes, instantiation) + .wrap_err("Failed to create ArenaHeader for OpsArena")?; + + if ((header.used - size_of::()) % size_of::()) != 0 { + return Err(ArenaError::Misaligned { size: header.used }.into()); + } + + let count = (header.used - size_of::()) / size_of::(); + + log::debug!("[unsafe] converting Vec to &[C_Op] of size {}", count); + let ops = unsafe { + let ptr = bytes.as_ptr().add(size_of::()) as *const C_Op; + std::slice::from_raw_parts(ptr, count) + }; + + Ok(Self { raw: bytes, ops }) + } + + pub fn decode(self, ctx: &ArenaContext) -> Result> { + self.ops + .iter() + .map(|x| ops::Op::ffi_from(x, ctx)) + .collect::>>() + .wrap_err("Failed to decode arena ops") + } +} + +/// Arena allocator metadata placed at the beginning of arena files by libprobe. +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ArenaHeader { + instantiation: libc::size_t, + base_address: libc::uintptr_t, + capacity: libc::uintptr_t, + used: libc::uintptr_t, +} + +impl ArenaHeader { + /// Parse the front of a raw byte buffer into a libprobe arena header + fn from_bytes(bytes: &[u8], instantiation: usize) -> Result { + let ptr = bytes as *const [u8] as *const Self; + + if bytes.len() < size_of::() { + return Err(ArenaError::BufferTooSmall { + got: bytes.len(), + needed: size_of::(), + } + .into()); + } + + log::debug!("[unsafe] converting byte buffer into ArenaHeader"); + let header = unsafe { + Self { + instantiation: (*ptr).instantiation, + base_address: (*ptr).base_address, + capacity: (*ptr).capacity, + used: (*ptr).used, + } + }; + log::debug!( + "[unsafe] created ArenaHeader [ inst={}, base_addr={:#x}, capacity: {}, used={} ]", + header.instantiation, + header.base_address, + header.capacity, + header.used + ); + + if header.capacity != bytes.len() { + return Err(ArenaError::InvalidCapacity { + expected: header.capacity, + actual: bytes.len(), + } + .into()); + } + if header.used > header.capacity { + return Err(ArenaError::InvalidSize { + size: header.used, + capacity: header.capacity, + } + .into()); + } + + if header.instantiation != instantiation { + return Err(ArenaError::InstantiationMismatch { + header: header.instantiation, + passed: instantiation, + } + .into()); + } + + Ok(header) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum ArenaError { + /// Returned if an [`ArenaHeader`] was construction was attempted with a byte buffer smaller + /// than an [`ArenaHeader`]. + #[error("Arena buffer too small, got {got}, minimum size {needed}")] + BufferTooSmall { got: usize, needed: usize }, + + /// Returned if the [`ArenaHeader`]'s capacity value doesn't match the size of the byte buffer. + #[error("Invalid arena capacity, expected {expected}, got {actual}")] + InvalidCapacity { expected: usize, actual: usize }, + + /// Returned if the [`ArenaHeader`]'s size value is larger than the capacity value. This + #[error("Arena size {size} is greater than capacity {capacity}")] + InvalidSize { size: usize, capacity: usize }, + + /// Returned if an [`OpsArena`]'s size isn't isn't `HEADER_SIZE + (N * OP_SIZE)` when `N` is + /// some integer. + #[error("Arena alignment error: arena size ({size}) minus header isn't a multiple of op size")] + Misaligned { size: usize }, + + /// Returned if the instantiation in a [`ArenaHeader`] doesn't match the indicated one + #[error("Header contained Instantiation ID {header}, but {passed} was indicated")] + InstantiationMismatch { header: usize, passed: usize }, +} diff --git a/probe_src/probe_frontend/macros/Cargo.toml b/probe_src/probe_frontend/macros/Cargo.toml new file mode 100644 index 00000000..4fad29d4 --- /dev/null +++ b/probe_src/probe_frontend/macros/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "probe_macros" +version.workspace = true +license.workspace = true +authors.workspace = true +publish.workspace = true +edition.workspace = true + +[lib] +name = "probe_macros" +proc-macro = true + +[dependencies] +parking_lot = "0.12.3" +proc-macro2 = "1.0.86" +quote = "1.0.36" +syn = "2.0.68" + +[lints] +workspace = true diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs new file mode 100644 index 00000000..a7b57942 --- /dev/null +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -0,0 +1,342 @@ +use proc_macro::TokenStream; +use proc_macro2::Span; +use quote::{quote, quote_spanned}; +use syn::parse::Parse; +use syn::spanned::Spanned; +use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident, Type}; +use syn::{parse_quote, LitStr, Token}; + +mod pygen; + +type MacroResult = Result; + +/// Generate a native rust struct from a rust-bindgen struct. +/// +/// In order to successfully generate a new struct, the struct it's invoked on must have the +/// following characteristics: +/// +/// - be a named struct (tuple and unit structs not supported). +/// - Name starts with `C_`. +/// - contain only types that implement `FfiFrom` (defined in probe_frontend, see ops module for +/// details). +/// +/// In will generate a struct with the following characteristics: +/// +/// - same name, but without the `C_` prefix, and converted from snake_case to PascalCase. +/// - any field in the original struct starting with `__` is ignored. +/// - any field in the original struct starting with `ru_`, `tv_`, or `stx_` will have that prefix +/// removed. +/// - derives serde's `Serialize`, `Deserialize` traits. +/// - contains a unit field `_type` that serializes to the struct's name. +/// - implements `FfiFrom` by calling it recursively on each field. +/// - derives [`PygenDataclass`]. +#[proc_macro_derive(MakeRustOp)] +pub fn make_rust_op(input: TokenStream) -> TokenStream { + let original_struct = parse_macro_input!(input as DeriveInput); + let DeriveInput { data, ident, .. } = original_struct.clone(); + + match data { + Data::Struct(data_struct) => { + let fields = match data_struct.fields { + Fields::Named(x) => x, + _ => { + return quote_spanned! { + original_struct.span() => + compile_error!("Unit and Tuple structs not supported"); + } + .into() + } + }; + + let pairs = match fields + .named + .iter() + .filter_map(|field| { + let ident = match field.ident.as_ref() { + Some(x) => x, + None => { + return Some(Err(quote_spanned! { + field.ident.span() => + compile_error!("Field had no identifier"); + } + .into())) + } + }; + let ident_str = ident.to_string(); + for prefix in ["__spare", "__reserved"] { + if ident_str.starts_with(prefix) { + return None; + } + } + + let pair = convert_bindgen_type(&field.ty).map(|ty| (ident, ty)); + Some(pair) + }) + .collect::>>() + { + Ok(x) => x, + Err(e) => return e, + }; + + let field_idents = pairs.iter().map(|x| x.0).collect::>(); + + let field_idents_stripped = field_idents + .iter() + .map(|old| { + let span = old.span(); + let str = old.to_string(); + let mut slice = str.as_str(); + + for prefix in ["ru_", "tv_", "stx_"] { + if let Some(stripped) = str.strip_prefix(prefix) { + slice = stripped; + break; + } + } + + Ident::new(slice, span) + }) + .collect::>(); + + let field_types = pairs.into_iter().map(|x| x.1).collect::>(); + + let new_name = Ident::new( + &snake_case_to_pascal( + ident + .to_string() + .strip_prefix("C_") + .expect("struct name doesn't start with 'C_'"), + ), + Span::call_site(), + ); + + let msgs = field_idents + .iter() + .map(|field_ident| { + format!( + "Error calling ffi_into() on {} while creating {}", + field_ident, new_name + ) + }) + .collect::>(); + + let serialize_type_path = format!("{}::serialize_type", new_name); + let type_name = new_name.to_string(); + + // This is rather bad macro hygiene, but this macro is only intend for probe_frontend's + // op struct generation, so we're playing a little fast-n'-loose with scoping. + quote! { + #[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] + pub struct #new_name { + #(pub #field_idents_stripped: #field_types,)* + + /// this is a placeholder field that get's serialized as the type name + #[serde(serialize_with = #serialize_type_path)] + #[serde(skip_deserializing)] + pub _type: (), + } + + impl #new_name { + fn serialize_type( + _: &(), + serializer: S + ) -> std::result::Result { + serializer.serialize_str(#type_name) + } + } + + impl FfiFrom<#ident> for #new_name { + fn ffi_from(value: &#ident, ctx: &ArenaContext) -> Result { + Ok(Self { + _type: (), + #( + #field_idents_stripped: value.#field_idents + .ffi_into(ctx) + .map_err(|e| { + ProbeError::FFiConversionError { + msg: #msgs, + inner: Box::new(e), + } + })?, + )* + }) + } + } + } + .into() + } + _ => quote_spanned! { + original_struct.span() => + compile_error!("MakeRustOp only supports structs"); + } + .into(), + } +} + +fn convert_bindgen_type(ty: &syn::Type) -> MacroResult { + match ty { + syn::Type::Ptr(_inner) => Ok(parse_quote!(::std::ffi::CString)), + syn::Type::Array(inner) => { + let mut new = inner.clone(); + new.elem = Box::new(convert_bindgen_type(&new.elem)?); + Ok(Type::Array(new)) + } + syn::Type::Path(inner) => { + if let Some(name) = type_basename(inner)?.to_string().strip_prefix("C_") { + let name = snake_case_to_pascal(name); + let name = Ident::new(&name, Span::mixed_site()); + Ok(parse_quote!(#name)) + } else { + Ok(Type::Path(inner.clone())) + } + } + _ => Err(quote_spanned! { + ty.span() => + compile_error!("Unable to convert bindgen type"); + } + .into()), + } +} + +fn type_basename(ty: &syn::TypePath) -> MacroResult<&syn::Ident> { + if let Some(qself) = &ty.qself { + return Err(quote_spanned! { + qself.span() => + compile_error!("Qualified self types not supported"); + } + .into()); + } + + match ty.path.segments.last() { + Some(x) => Ok(&x.ident), + None => Err(quote_spanned! { + ty.path.segments.span() => + compile_error!("Type path has no segments"); + } + .into()), + } +} + +fn snake_case_to_pascal(input: &str) -> String { + input + .chars() + .fold((true, String::new()), |(prior_underscore, mut acc), ch| { + if ch == '_' { + return (true, acc); + } else if prior_underscore { + ch.to_uppercase().for_each(|x| acc.push(x)) + } else { + acc.push(ch) + } + (false, acc) + }) + .1 +} + +/// Generate a python dataclass from a rust struct. +/// +/// In order to successfully generate a dataclass, the struct it's invoked on must have the +/// following characteristics: +/// +/// - be a named struct (tuple and unit structs not supported). +/// - OR be an enum with either named variants or tuple enums containing only one item. +/// - contain only primitives, [`CString`](std::ffi::CString)s, or other generated dataclasses. +/// - field with the unit type are also allowed, but they're ignored. +#[proc_macro_derive(PygenDataclass)] +pub fn pygen_dataclass(input: TokenStream) -> TokenStream { + let source = parse_macro_input!(input as DeriveInput); + match pygen::pygen_dataclass_internal(source) { + Ok(_) => TokenStream::new(), + Err(e) => e, + } +} + +/// write the generated python to a path contained in a environment variable. +#[proc_macro] +pub fn pygen_write_to_env(input: TokenStream) -> TokenStream { + let path = parse_macro_input!(input as syn::LitStr); + match pygen::pygen_write_internal(path) { + Ok(_) => TokenStream::new(), + Err(e) => e, + } +} + +/// add a property to a python dataclass with the following syntax: +/// +/// ``` +/// pygen_add_prop!(ClassName impl prop_name -> return_type: +/// "line1", +/// "return line2" +/// ... +/// ); +/// ``` +#[proc_macro] +pub fn pygen_add_prop(input: TokenStream) -> TokenStream { + let args = parse_macro_input!(input as AddPropArgs); + match pygen::pygen_add_prop_internal(args) { + Ok(_) => TokenStream::new(), + Err(e) => e, + } +} + +pub(crate) struct AddPropArgs { + class: Ident, + name: Ident, + ret: Ident, + body: Vec, +} + +impl Parse for AddPropArgs { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let class = input.parse()?; + input.parse::()?; + let name = input.parse()?; + input.parse::]>()?; + let ret = input.parse()?; + input.parse::()?; + + let mut body = vec![]; + body.push(input.parse::()?.value()); + while !input.is_empty() { + input.parse::()?; + if input.is_empty() { + break; + } + body.push(input.parse::()?.value()); + } + + Ok(Self { + class, + name, + ret, + body, + }) + } +} + +/// Add one or more lines to the generated python file, after the imports, but before any generated +/// class or enum. +#[proc_macro] +pub fn pygen_add_preamble(input: TokenStream) -> TokenStream { + let args = parse_macro_input!(input as AddPreambleArgs); + pygen::pygen_add_preamble(args); + TokenStream::new() +} + +pub(crate) struct AddPreambleArgs(pub Vec); + +impl Parse for AddPreambleArgs { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let mut lines = vec![]; + lines.push(input.parse::()?.value()); + while !input.is_empty() { + input.parse::()?; + if input.is_empty() { + break; + } + lines.push(input.parse::()?.value()); + } + + Ok(Self(lines)) + } +} diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs new file mode 100644 index 00000000..6008b8f3 --- /dev/null +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -0,0 +1,544 @@ +use parking_lot::RwLock; +use quote::quote_spanned; +use std::fmt::Display; +use std::fs::File; +use std::io::Write; +use std::sync::OnceLock; +use syn::{spanned::Spanned, Data, Fields}; + +use crate::MacroResult; + +fn pygen_file() -> &'static RwLock { + static INNER: OnceLock> = OnceLock::new(); + INNER.get_or_init(|| RwLock::new(PygenFile::new())) +} + +pub fn pygen_dataclass_internal(input: syn::DeriveInput) -> MacroResult<()> { + let syn::DeriveInput { data, ident, .. } = input.clone(); + + match data { + Data::Struct(data_struct) => { + let fields = match data_struct.fields { + Fields::Named(x) => x, + _ => { + return Err(quote_spanned! { + input.span() => + compile_error!("Unnamed and unit structs not implemented") + } + .into()) + } + }; + + let pairs = fields + .named + .iter() + .filter_map(|field| { + if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &field.ty { + // this is the unit type, so we just skip it + if elems.is_empty() { + return None; + } + } + + let pair = + convert_to_pytype(&field.ty).and_then(|ty| match field.ident.as_ref() { + Some(ident) => Ok((ident.to_string(), ty)), + None => Err(quote_spanned! { + field.span() => + compile_error!("Field doesn't have identifier"); + } + .into()), + }); + + Some(pair) + }) + .collect::>>()?; + + let dataclass = basic_dataclass(ident.to_string(), &pairs); + pygen_file().write().classes.push(dataclass); + } + Data::Enum(data_enum) => { + let mut enu = Enum::new(ident.to_string()); + + // this is the types that the produced union is over + let mut variants = vec![]; + + for variant in data_enum.variants { + match variant.fields { + syn::Fields::Named(inner) => { + let name = variant.ident.to_string(); + + let pairs = inner + .named + .iter() + .filter_map(|field| { + // skip any field who's type is the unit type + if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &field.ty { + if elems.is_empty() { + return None; + } + } + + let pair = convert_to_pytype(&field.ty).and_then(|ty| match field + .ident + .as_ref() + { + Some(ident) => Ok((ident.to_string(), ty)), + None => Err(quote_spanned! { + field.span() => + compile_error!("Field doesn't have identifier"); + } + .into()), + }); + + Some(pair) + }) + .collect::>>()?; + + enu.add_variant_owned_class(basic_dataclass(name.clone(), &pairs)); + variants.push(name); + } + syn::Fields::Unnamed(inner) => { + let fields = inner.unnamed.iter().collect::>(); + if fields.len() != 1 { + return Err(quote_spanned! { + inner.span() => + compile_error!("Tuple enums of length != 1 not supported") + } + .into()); + } + enu.add_variant_ref(convert_to_pytype(&fields[0].ty)?); + } + syn::Fields::Unit => { + return Err(quote_spanned! { + variant.fields.span() => + compile_error!("Unit enum variants not supported") + } + .into()) + } + } + } + + pygen_file().write().enums.push(enu); + } + Data::Union(_data_union) => { + return Err(quote_spanned! { + input.span() => + compile_error!("Unions not supported") + } + .into()) + } + }; + + Ok(()) +} + +fn basic_dataclass(name: String, pairs: &[(String, String)]) -> Dataclass { + let mut dataclass = Dataclass::new(name); + + for (ident, ty) in pairs { + dataclass.add_item(DataclassItem::new(ident.clone(), ty.clone())); + } + + dataclass +} + +fn convert_to_pytype(ty: &syn::Type) -> MacroResult { + match ty { + syn::Type::Array(inner) => Ok(format!("list[{}]", convert_to_pytype(inner.elem.as_ref())?)), + syn::Type::Path(inner) => { + let name = crate::type_basename(inner)?.to_string(); + Ok(match name.as_str() { + // that's a lot of ways to say "int", python ints are bigints so we don't have to + // care about size + "TaskType" | "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" + | "__suseconds_t" | "__syscall_slong_t" | "__syseconds_t" | "__time_t" + | "__u16" | "__u32" | "__u64" | "__uid_t" | "c_int" | "c_long" | "c_uint" + | "c_ulong" | "dev_t" | "gid_t" | "i128" | "i16" | "i32" | "i64" | "i8" + | "ino_t" | "isize" | "mode_t" | "pid_t" | "pthread_t" | "thrd_t" | "u128" + | "u16" | "u32" | "u64" | "u8" | "uid_t" | "usize" => "int".to_owned(), + + // float, python uses doubles for everything + "f32" | "f64" => "float".to_owned(), + + // CStrings are serialized as an array of bytes, so it makes sense to load them + // into python as bytes + "CString" => "bytes".to_owned(), + + // bool types are basically the same everywhere + "bool" => name, + + _ => name, + }) + } + _ => Err(quote_spanned! { + ty.span() => + compile_error!("Unsupported type type"); + } + .into()), + } +} + +pub(crate) fn pygen_write_internal(path: syn::LitStr) -> MacroResult<()> { + let path_str = path.value(); + let path_str = match std::env::var_os(path_str) { + Some(x) => x, + None => { + return Err(quote_spanned! { + path.span() => + compile_error!("Environmnet variable not defined"); + } + .into()) + } + }; + + let mut file = match File::create(path_str) { + Ok(x) => x, + Err(e) => { + eprintln!("pygen IO error: {}", e); + return Err(quote_spanned! { + path.span() => + compile_error!("Failed to create pygen file"); + } + .into()); + } + }; + + pygen_file().write().prepend_preamble( + [ + "from __future__ import annotations", + "import typing", + "from dataclasses import dataclass\n", + ] + .into_iter() + .map(|x| x.to_owned()) + .collect(), + ); + + if let Err(e) = writeln!(file, "{}", pygen_file().read()) { + eprintln!("pygen IO error: {}", e); + return Err(quote_spanned! { + path.span() => + compile_error!("Failed to write pygen file"); + } + .into()); + } + + Ok(()) +} + +pub(crate) fn pygen_add_prop_internal(args: crate::AddPropArgs) -> MacroResult<()> { + let class = args.class.to_string(); + let mut prop = DataclassProp::new(args.name.to_string(), args.ret.to_string()); + args.body.into_iter().for_each(|x| prop.body.push(x)); + + let mut write_lock = pygen_file().write(); + + let dataclass = match write_lock + .classes + .iter_mut() + .find(|dataclass| dataclass.name == class) + { + Some(x) => x, + None => { + return Err(quote_spanned! { + args.class.span() => + compile_error!("No such dataclass found"); + } + .into()) + } + }; + + dataclass.add_prop(prop); + + Ok(()) +} + +pub(crate) fn pygen_add_preamble(args: crate::AddPreambleArgs) { + pygen_file().write().append_preamble(args.0) +} + +#[derive(Debug, Clone)] +struct PygenFile { + preamble: Vec, + pub classes: Vec, + pub enums: Vec, +} + +#[derive(Debug, Clone)] +struct Enum { + indent: usize, + pub name: String, + variants_owned_class: Vec, + variants_owned_enum: Vec, + variants_ref: Vec, +} + +#[derive(Debug, Clone)] +struct Dataclass { + indent: usize, + pub name: String, + inclasses: Vec, + items: Vec, + properties: Vec, +} + +#[derive(Debug, Clone)] +struct DataclassItem { + indent: usize, + name: String, + ty: String, +} + +#[derive(Debug, Clone)] +struct DataclassProp { + indent: usize, + name: String, + ret: String, + pub body: Vec, +} + +#[allow(dead_code)] +impl PygenFile { + pub fn new() -> Self { + Self { + preamble: vec![], + classes: vec![], + enums: vec![], + } + } + + pub fn prepend_preamble(&mut self, mut lines: Vec) { + lines.extend(std::mem::take(&mut self.preamble)); + self.preamble = lines; + } + + pub fn append_preamble(&mut self, lines: Vec) { + self.preamble.extend(lines); + } +} + +#[allow(dead_code)] +impl Enum { + pub fn new(name: String) -> Self { + Self { + indent: 0, + name, + variants_owned_class: vec![], + variants_owned_enum: vec![], + variants_ref: vec![], + } + } + + pub fn add_variant_owned_class(&mut self, mut item: Dataclass) { + item.set_indent(self.indent); + self.variants_owned_class.push(item); + } + + pub fn add_variant_owned_enum(&mut self, mut item: Enum) { + item.set_indent(self.indent); + self.variants_owned_enum.push(item); + } + + pub fn add_variant_ref(&mut self, item: String) { + self.variants_ref.push(item); + } + + pub fn set_indent(&mut self, indent: usize) { + for class in &mut self.variants_owned_class { + class.set_indent(indent); + } + for enu in &mut self.variants_owned_enum { + enu.set_indent(indent); + } + + self.indent = indent; + } +} + +#[allow(dead_code)] +impl Dataclass { + pub fn new(name: String) -> Self { + Self { + indent: 0, + name, + inclasses: vec![], + items: vec![], + properties: vec![], + } + } + + pub fn add_inclass(&mut self, mut inclass: Dataclass) { + inclass.set_indent(self.indent + 4); + self.inclasses.push(inclass) + } + + pub fn add_item(&mut self, mut item: DataclassItem) { + item.set_indent(self.indent + 4); + self.items.push(item) + } + + pub fn add_prop(&mut self, mut prop: DataclassProp) { + prop.set_indent(self.indent + 4); + self.properties.push(prop) + } + + pub fn set_indent(&mut self, indent: usize) { + for inclass in &mut self.inclasses { + inclass.set_indent(indent + 4); + } + for item in &mut self.items { + item.set_indent(indent + 4); + } + + self.indent = indent; + } +} + +impl DataclassItem { + pub fn new(name: String, ty: String) -> Self { + Self { + indent: 0, + name, + ty, + } + } + + pub fn set_indent(&mut self, indent: usize) { + self.indent = indent; + } +} + +impl DataclassProp { + pub fn new(name: String, ret: String) -> Self { + Self { + indent: 0, + name, + ret, + body: vec![], + } + } + + pub fn set_indent(&mut self, indent: usize) { + self.indent = indent; + } +} + +// Display trait implementations for actual codegen + +impl Display for PygenFile { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "# This file was @generated by probe_macros")?; + + for line in self.preamble.iter() { + writeln!(f, "{line}")?; + } + writeln!(f)?; + + for class in self.classes.iter() { + writeln!(f, "{class}")?; + } + + for enu in self.enums.iter() { + writeln!(f, "{enu}")?; + } + + Ok(()) + } +} + +impl Display for Enum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn print_union_type(types: &[&str], f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if types.is_empty() { + write!(f, "None")?; + return Ok(()); + } + let mut iter = types.iter(); + + // unwrap allowed because we checked that types isn't empty + let first = iter.next().unwrap(); + write!(f, "{first}")?; + + for ty in iter { + write!(f, " | {ty}")?; + } + + Ok(()) + } + + let name = &self.name; + let mut acc = Vec::new(); + + for owned_variant in self.variants_owned_class.iter() { + writeln!(f, "{owned_variant}")?; + acc.push(owned_variant.name.as_str()); + } + + for owned_variant in self.variants_owned_enum.iter() { + writeln!(f, "{owned_variant}")?; + acc.push(owned_variant.name.as_str()); + } + + self.variants_ref.iter().for_each(|x| acc.push(x)); + + let indent_str = " ".repeat(self.indent); + write!(f, "{indent_str}{name}: typing.TypeAlias = ")?; + print_union_type(acc.as_slice(), f) + } +} + +impl Display for Dataclass { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = self.name.as_str(); + let indent_str = " ".repeat(self.indent); + + writeln!( + f, + "{indent_str}@dataclass(init=True, frozen=True)\n\ + {indent_str}class {name}:" + )?; + + for inclass in &self.inclasses { + writeln!(f, "{inclass}",)?; + } + + for item in &self.items { + writeln!(f, "{item}")?; + } + writeln!(f)?; + + for prop in &self.properties { + writeln!(f, "{prop}")?; + } + + Ok(()) + } +} + +impl Display for DataclassItem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let &Self { name, ty, .. } = &self; + let indent_str = " ".repeat(self.indent); + write!(f, "{indent_str}{name}: {ty}") + } +} + +impl Display for DataclassProp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let &Self { name, ret, .. } = &self; + let indent_str = " ".repeat(self.indent); + + writeln!( + f, + "{indent_str}@property\n\ + {indent_str}def {name}(self) -> {ret}:", + )?; + + for line in &self.body { + writeln!(f, "{indent_str} {line}")?; + } + + Ok(()) + } +} diff --git a/probe_src/probe_frontend/python/probe_py/generated/__init__.py b/probe_src/probe_frontend/python/probe_py/generated/__init__.py new file mode 100644 index 00000000..9f8e34d3 --- /dev/null +++ b/probe_src/probe_frontend/python/probe_py/generated/__init__.py @@ -0,0 +1,7 @@ +""" +Generated code for reading with PROBE logs. + +See https://github.com/charmoniumQ/PROBE +""" + +__version__ = "@version@" diff --git a/probe_src/probe_frontend/python/probe_py/generated/ops.py b/probe_src/probe_frontend/python/probe_py/generated/ops.py new file mode 100644 index 00000000..03bed957 --- /dev/null +++ b/probe_src/probe_frontend/python/probe_py/generated/ops.py @@ -0,0 +1,233 @@ +# This file was @generated by probe_macros +from __future__ import annotations +import typing +from dataclasses import dataclass + +# https://github.com/torvalds/linux/blob/73e931504f8e0d42978bfcda37b323dbbd1afc08/include/uapi/linux/fcntl.h#L98 +AT_FDCWD: int = -100 + +@dataclass(init=True, frozen=True) +class Timespec: + sec: int + nsec: int + + +@dataclass(init=True, frozen=True) +class StatxTimestamp: + sec: int + nsec: int + + +@dataclass(init=True, frozen=True) +class Statx: + mask: int + blksize: int + attributes: int + nlink: int + uid: int + gid: int + mode: int + ino: int + size: int + blocks: int + attributes_mask: int + atime: StatxTimestamp + btime: StatxTimestamp + ctime: StatxTimestamp + mtime: StatxTimestamp + rdev_major: int + rdev_minor: int + dev_major: int + dev_minor: int + mnt_id: int + dio_mem_align: int + dio_offset_align: int + + +@dataclass(init=True, frozen=True) +class Timeval: + sec: int + usec: int + + +@dataclass(init=True, frozen=True) +class Rusage: + utime: Timeval + stime: Timeval + maxrss: int + ixrss: int + idrss: int + isrss: int + minflt: int + majflt: int + nswap: int + inblock: int + oublock: int + msgsnd: int + msgrcv: int + nsignals: int + nvcsw: int + nivcsw: int + + +@dataclass(init=True, frozen=True) +class Path: + dirfd_minus_at_fdcwd: int + path: bytes + device_major: int + device_minor: int + inode: int + mtime: StatxTimestamp + ctime: StatxTimestamp + stat_valid: bool + dirfd_valid: bool + + @property + def dirfd(self) -> int: + return self.dirfd_minus_at_fdcwd + AT_FDCWD + + +@dataclass(init=True, frozen=True) +class InitProcessOp: + pid: int + + +@dataclass(init=True, frozen=True) +class InitExecEpochOp: + epoch: int + program_name: bytes + + +@dataclass(init=True, frozen=True) +class InitThreadOp: + tid: int + + +@dataclass(init=True, frozen=True) +class OpenOp: + path: Path + flags: int + mode: int + fd: int + ferrno: int + + +@dataclass(init=True, frozen=True) +class CloseOp: + low_fd: int + high_fd: int + ferrno: int + + +@dataclass(init=True, frozen=True) +class ChdirOp: + path: Path + ferrno: int + + +@dataclass(init=True, frozen=True) +class ExecOp: + path: Path + ferrno: int + + +@dataclass(init=True, frozen=True) +class CloneOp: + flags: int + run_pthread_atfork_handlers: bool + task_type: int + task_id: int + ferrno: int + + +@dataclass(init=True, frozen=True) +class ExitOp: + status: int + run_atexit_handlers: bool + + +@dataclass(init=True, frozen=True) +class AccessOp: + path: Path + mode: int + flags: int + ferrno: int + + +@dataclass(init=True, frozen=True) +class StatOp: + path: Path + flags: int + statx_buf: Statx + ferrno: int + + +@dataclass(init=True, frozen=True) +class ReaddirOp: + dir: Path + child: bytes + all_children: bool + ferrno: int + + +@dataclass(init=True, frozen=True) +class WaitOp: + task_type: int + task_id: int + options: int + status: int + ferrno: int + + +@dataclass(init=True, frozen=True) +class GetRUsageOp: + waitpid_arg: int + getrusage_arg: int + usage: Rusage + ferrno: int + + +@dataclass(init=True, frozen=True) +class ReadLinkOp: + path: Path + resolved: bytes + ferrno: int + + +@dataclass(init=True, frozen=True) +class UpdateMetadataOp: + path: Path + flags: int + metadata: Metadata + ferrno: int + + +@dataclass(init=True, frozen=True) +class Op: + data: OpInternal + time: Timespec + pthread_id: int + iso_c_thread_id: int + + +@dataclass(init=True, frozen=True) +class Mode: + mode: int + + +@dataclass(init=True, frozen=True) +class Ownership: + uid: int + gid: int + + +@dataclass(init=True, frozen=True) +class Times: + is_null: bool + atime: Timeval + mtime: Timeval + + +Metadata: typing.TypeAlias = Mode | Ownership | Times +OpInternal: typing.TypeAlias = InitProcessOp | InitExecEpochOp | InitThreadOp | OpenOp | CloseOp | ChdirOp | ExecOp | CloneOp | ExitOp | AccessOp | StatOp | ReaddirOp | WaitOp | GetRUsageOp | UpdateMetadataOp | ReadLinkOp + diff --git a/probe_src/probe_frontend/python/probe_py/generated/probe.py b/probe_src/probe_frontend/python/probe_py/generated/probe.py new file mode 100644 index 00000000..a4bd52cc --- /dev/null +++ b/probe_src/probe_frontend/python/probe_py/generated/probe.py @@ -0,0 +1,84 @@ + +import typing +import json +import tarfile +from dataclasses import dataclass +from . import ops + +@dataclass(frozen=True) +class ThreadProvLog: + tid: int + ops: typing.Sequence[ops.Op] + +@dataclass(frozen=True) +class ExecEpochProvLog: + epoch: int + threads: typing.Mapping[int, ThreadProvLog] + + +@dataclass(frozen=True) +class ProcessProvLog: + pid: int + exec_epochs: typing.Mapping[int, ExecEpochProvLog] + + +@dataclass(frozen=True) +class ProvLog: + processes: typing.Mapping[int, ProcessProvLog] + +def load_log(path: str) -> ProvLog: + op_map: typing.Dict[int, typing.Dict[int, typing.Dict[int, ThreadProvLog]]] = {} + + tar = tarfile.open(path, mode='r') + + for item in tar: + # items with size zero are directories in the tarball + if item.size == 0: + continue + + # extract and name the hierarchy components + parts = item.name.split("/") + if len(parts) != 3: + raise ValueError("malformed probe log") + pid: int = int(parts[0]) + epoch: int = int(parts[1]) + tid: int = int(parts[2]) + + # ensure necessary dict objects have been created + if pid not in op_map: + op_map[pid] = {} + if epoch not in op_map[pid]: + op_map[pid][epoch] = {} + + # extract file contents as byte buffer + file = tar.extractfile(item) + if file is None: + raise IOError("Unable to read jsonlines from probe log") + + # read, split, comprehend, deserialize, extend + jsonlines = file.read().strip().split(b"\n") + ops = ThreadProvLog(tid, [json.loads(x, object_hook=op_hook) for x in jsonlines]) + op_map[pid][epoch][tid] = ops + + return ProvLog({ + pid: ProcessProvLog( + pid, + { + epoch: ExecEpochProvLog(epoch, threads) + for epoch, threads in epochs.items() + }, + ) + for pid, epochs in op_map.items() + }) + +def op_hook(json_map: typing.Dict[str, typing.Any]): + ty: str = json_map["_type"] + json_map.pop("_type") + + constructor = ops.__dict__[ty] + + for ident, ty in constructor.__annotations__.items(): + if ty == "bytes" and ident in json_map: + json_map[ident] = bytes(json_map[ident]) + + return constructor(**json_map) diff --git a/probe_src/probe_frontend/python/pyproject.toml b/probe_src/probe_frontend/python/pyproject.toml new file mode 100644 index 00000000..2be4c8d5 --- /dev/null +++ b/probe_src/probe_frontend/python/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "probe_py.generated" +authors = [ + # authors generated from Cargo.toml@authors@: +] +license = {file = "LICENSE"} +classifiers = ["License :: OSI Approved :: MIT License"] +dynamic = ["version", "description"] + +[project.urls] +Home = "https://github.com/charmoniumQ/PROBE"