diff --git a/docs/webhook_events.md b/docs/webhook_events.md index 85d8889607..554d1144cd 100644 --- a/docs/webhook_events.md +++ b/docs/webhook_events.md @@ -416,6 +416,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -438,6 +442,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -457,6 +465,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -2323,6 +2335,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -2345,6 +2361,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -2364,6 +2384,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -3051,6 +3075,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -3073,6 +3101,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -3092,6 +3124,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -3570,6 +3606,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -3592,6 +3632,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -3611,6 +3655,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -4032,6 +4080,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -4054,6 +4106,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -4073,6 +4129,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -4468,6 +4528,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -4490,6 +4554,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -4509,6 +4577,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -4931,6 +5003,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -4953,6 +5029,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -4972,6 +5052,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" @@ -6678,6 +6762,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Expect Crash On Failure", "type": "boolean" }, + "function_allowlist": { + "title": "Function Allowlist", + "type": "string" + }, "generator_env": { "additionalProperties": { "type": "string" @@ -6700,6 +6788,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Minimized Stack Depth", "type": "integer" }, + "module_allowlist": { + "title": "Module Allowlist", + "type": "string" + }, "preserve_existing_outputs": { "title": "Preserve Existing Outputs", "type": "boolean" @@ -6719,6 +6811,10 @@ If webhook is set to have Event Grid message format then the payload will look a "title": "Report List", "type": "array" }, + "source_allowlist": { + "title": "Source Allowlist", + "type": "string" + }, "stats_file": { "title": "Stats File", "type": "string" diff --git a/src/ApiService/ApiService/OneFuzzTypes/Enums.cs b/src/ApiService/ApiService/OneFuzzTypes/Enums.cs index d0498cdbe0..9843a564d9 100644 --- a/src/ApiService/ApiService/OneFuzzTypes/Enums.cs +++ b/src/ApiService/ApiService/OneFuzzTypes/Enums.cs @@ -274,6 +274,9 @@ public enum TaskFeature { ReportList, MinimizedStackDepth, CoverageFilter, + FunctionAllowlist, + ModuleAllowlist, + SourceAllowlist, TargetMustUseInput, TargetAssembly, TargetClass, diff --git a/src/ApiService/ApiService/OneFuzzTypes/Model.cs b/src/ApiService/ApiService/OneFuzzTypes/Model.cs index 3d3db31ba6..a2f4416bc4 100644 --- a/src/ApiService/ApiService/OneFuzzTypes/Model.cs +++ b/src/ApiService/ApiService/OneFuzzTypes/Model.cs @@ -208,7 +208,13 @@ public record TaskDetails( bool? PreserveExistingOutputs = null, List? ReportList = null, long? MinimizedStackDepth = null, + + // Deprecated. Retained for processing old table data. string? CoverageFilter = null, + + string? FunctionAllowlist = null, + string? ModuleAllowlist = null, + string? SourceAllowlist = null, string? TargetAssembly = null, string? TargetClass = null, string? TargetMethod = null @@ -977,7 +983,13 @@ Uri HeartbeatQueue public long? EnsembleSyncDelay { get; set; } public List? ReportList { get; set; } public long? MinimizedStackDepth { get; set; } + + // Deprecated. Retained for processing old table data. public string? CoverageFilter { get; set; } + + public string? FunctionAllowlist { get; set; } + public string? ModuleAllowlist { get; set; } + public string? SourceAllowlist { get; set; } public string? TargetAssembly { get; set; } public string? TargetClass { get; set; } public string? TargetMethod { get; set; } diff --git a/src/ApiService/ApiService/onefuzzlib/Config.cs b/src/ApiService/ApiService/onefuzzlib/Config.cs index be5fea0fc1..2a28a35b34 100644 --- a/src/ApiService/ApiService/onefuzzlib/Config.cs +++ b/src/ApiService/ApiService/onefuzzlib/Config.cs @@ -262,6 +262,24 @@ await _containers.GetContainerSasUrl(x.Item2.Name, StorageType.Corpus, ConvertPe } } + if (definition.Features.Contains(TaskFeature.FunctionAllowlist)) { + if (task.Config.Task.FunctionAllowlist != null) { + config.FunctionAllowlist = task.Config.Task.FunctionAllowlist; + } + } + + if (definition.Features.Contains(TaskFeature.ModuleAllowlist)) { + if (task.Config.Task.ModuleAllowlist != null) { + config.ModuleAllowlist = task.Config.Task.ModuleAllowlist; + } + } + + if (definition.Features.Contains(TaskFeature.SourceAllowlist)) { + if (task.Config.Task.SourceAllowlist != null) { + config.SourceAllowlist = task.Config.Task.SourceAllowlist; + } + } + if (definition.Features.Contains(TaskFeature.TargetAssembly)) { config.TargetAssembly = task.Config.Task.TargetAssembly; } diff --git a/src/ApiService/ApiService/onefuzzlib/Defs.cs b/src/ApiService/ApiService/onefuzzlib/Defs.cs index b1692094f4..4a182ff0ac 100644 --- a/src/ApiService/ApiService/onefuzzlib/Defs.cs +++ b/src/ApiService/ApiService/onefuzzlib/Defs.cs @@ -10,8 +10,14 @@ public static class Defs { TaskFeature.TargetEnv, TaskFeature.TargetOptions, TaskFeature.TargetTimeout, - TaskFeature.CoverageFilter, TaskFeature.TargetMustUseInput, + + // Deprecated. Retained for processing old table data. + TaskFeature.CoverageFilter, + + TaskFeature.FunctionAllowlist, + TaskFeature.ModuleAllowlist, + TaskFeature.SourceAllowlist, }, Vm: new VmDefinition(Compare: Compare.Equal, Value:1), Containers: new [] { diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index bf4cf7cf3a..0458302b07 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -326,15 +326,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "brownstone" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "030ea61398f34f1395ccbeb046fb68c87b631d1f34567fed0f0f11fa35d18d8d" -dependencies = [ - "arrayvec 0.7.2", -] - [[package]] name = "brownstone" version = "3.0.0" @@ -579,42 +570,10 @@ dependencies = [ "pretty_assertions", "procfs", "regex", - "symbolic 10.2.0", + "symbolic", "thiserror", ] -[[package]] -name = "coverage-legacy" -version = "0.1.0" -dependencies = [ - "anyhow", - "bincode", - "cpp_demangle 0.3.5", - "debugger", - "dunce", - "env_logger 0.9.0", - "fixedbitset", - "goblin 0.5.1", - "iced-x86", - "log", - "memmap2", - "msvc-demangler", - "pdb 0.7.0", - "pete", - "pretty_assertions", - "procfs", - "quick-xml", - "regex", - "rustc-demangle", - "serde", - "serde_json", - "structopt", - "symbolic 8.8.0", - "uuid 0.8.2", - "win-util", - "winapi", -] - [[package]] name = "cpp_demangle" version = "0.3.5" @@ -800,9 +759,9 @@ dependencies = [ "goblin 0.6.0", "iced-x86", "log", - "pdb 0.8.0", + "pdb", "regex", - "symbolic 10.2.0", + "symbolic", "thiserror", ] @@ -822,15 +781,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "debugid" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91cf5a8c2f2097e2a32627123508635d47ce10563d999ec1a95addf08b502ba" -dependencies = [ - "uuid 0.8.2", -] - [[package]] name = "debugid" version = "0.8.0" @@ -918,16 +868,6 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" -[[package]] -name = "elementtree" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6319c9433cf1e95c60c8533978bccf0614f27f03bb4e514253468eeeaa7fe3" -dependencies = [ - "string_cache", - "xml-rs", -] - [[package]] name = "elementtree" version = "1.2.2" @@ -1048,12 +988,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "fixedbitset" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e" - [[package]] name = "flate2" version = "1.0.24" @@ -1331,7 +1265,7 @@ checksum = "c955ab4e0ad8c843ea653a3d143048b87490d9be56bd7132a435c2407846ac8f" dependencies = [ "log", "plain", - "scroll 0.11.0", + "scroll", ] [[package]] @@ -1342,7 +1276,7 @@ checksum = "572564d6cba7d09775202c8e7eebc4d534d5ae36578ab402fb21e182a0ac9505" dependencies = [ "log", "plain", - "scroll 0.11.0", + "scroll", ] [[package]] @@ -2003,26 +1937,13 @@ dependencies = [ "version_check", ] -[[package]] -name = "nom-supreme" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aadc66631948f6b65da03be4c4cd8bd104d481697ecbb9bbd65719b1ec60bc9f" -dependencies = [ - "brownstone 1.1.0", - "indent_write", - "joinery", - "memchr", - "nom 7.1.0", -] - [[package]] name = "nom-supreme" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bd3ae6c901f1959588759ff51c95d24b491ecb9ff91aa9c2ef4acc5b1dcab27" dependencies = [ - "brownstone 3.0.0", + "brownstone", "indent_write", "joinery", "memchr", @@ -2215,7 +2136,8 @@ dependencies = [ "backoff", "chrono", "clap 2.34.0", - "coverage-legacy", + "cobertura", + "coverage", "crossterm 0.22.1", "env_logger 0.9.0", "flume", @@ -2225,6 +2147,7 @@ dependencies = [ "log", "num_cpus", "onefuzz", + "onefuzz-file-format", "onefuzz-telemetry", "path-absolutize", "pretty_assertions", @@ -2422,17 +2345,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "pdb" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13f4d162ecaaa1467de5afbe62d597757b674b51da8bb4e587430c5fdb2af7aa" -dependencies = [ - "fallible-iterator", - "scroll 0.10.2", - "uuid 0.8.2", -] - [[package]] name = "pdb" version = "0.8.0" @@ -2440,7 +2352,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82040a392923abe6279c00ab4aff62d5250d1c8555dc780e4b02783a7aa74863" dependencies = [ "fallible-iterator", - "scroll 0.11.0", + "scroll", "uuid 1.2.1", ] @@ -2453,7 +2365,7 @@ dependencies = [ "bitflags", "elsa", "maybe-owned", - "pdb 0.8.0", + "pdb", "range-collections", "thiserror", ] @@ -2974,12 +2886,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "scroll" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec" - [[package]] name = "scroll" version = "0.11.0" @@ -3230,7 +3136,7 @@ dependencies = [ "env_logger 0.9.0", "log", "nom 7.1.0", - "pdb 0.8.0", + "pdb", "quick-xml", "regex", "serde", @@ -3363,40 +3269,16 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" -[[package]] -name = "symbolic" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b49345d083b1103e25c8c10e5e52cff254d33e70e29307c2bc4777074a25258" -dependencies = [ - "symbolic-common 8.8.0", - "symbolic-debuginfo 8.8.0", - "symbolic-demangle 8.8.0", - "symbolic-symcache 8.8.0", -] - [[package]] name = "symbolic" version = "10.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27ac8ad1ebe348393d71802e8b0f5084c51fde21ad4c29ba8f8fb4d7ad6ed671" dependencies = [ - "symbolic-common 10.2.1", - "symbolic-debuginfo 10.2.0", - "symbolic-demangle 10.2.1", - "symbolic-symcache 10.2.0", -] - -[[package]] -name = "symbolic-common" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f551f902d5642e58039aee6a9021a61037926af96e071816361644983966f540" -dependencies = [ - "debugid 0.7.2", - "memmap2", - "stable_deref_trait", - "uuid 0.8.2", + "symbolic-common", + "symbolic-debuginfo", + "symbolic-demangle", + "symbolic-symcache", ] [[package]] @@ -3405,42 +3287,12 @@ version = "10.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737" dependencies = [ - "debugid 0.8.0", + "debugid", "memmap2", "stable_deref_trait", "uuid 1.2.1", ] -[[package]] -name = "symbolic-debuginfo" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1165dabf9fc1d6bb6819c2c0e27c8dd0e3068d2c53cf186d319788e96517f0d6" -dependencies = [ - "bitvec 1.0.0", - "dmsort", - "elementtree 0.7.0", - "fallible-iterator", - "flate2", - "gimli", - "goblin 0.5.1", - "lazy_static", - "lazycell", - "nom 7.1.0", - "nom-supreme 0.6.0", - "parking_lot 0.12.1", - "pdb 0.7.0", - "regex", - "scroll 0.11.0", - "serde", - "serde_json", - "smallvec", - "symbolic-common 8.8.0", - "thiserror", - "wasmparser 0.83.0", - "zip 0.5.13", -] - [[package]] name = "symbolic-debuginfo" version = "10.2.0" @@ -3449,7 +3301,7 @@ checksum = "8f94766a96b5834eaf72f9cb99a5a45e63fa44f1084705b705d9d31bb6455434" dependencies = [ "bitvec 1.0.0", "dmsort", - "elementtree 1.2.2", + "elementtree", "elsa", "fallible-iterator", "flate2", @@ -3458,32 +3310,19 @@ dependencies = [ "lazy_static", "lazycell", "nom 7.1.0", - "nom-supreme 0.8.0", + "nom-supreme", "parking_lot 0.12.1", "pdb-addr2line", "regex", - "scroll 0.11.0", + "scroll", "serde", "serde_json", "smallvec", - "symbolic-common 10.2.1", + "symbolic-common", "symbolic-ppdb", "thiserror", - "wasmparser 0.94.0", - "zip 0.6.3", -] - -[[package]] -name = "symbolic-demangle" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4564ca7b4e6eb14105aa8bbbce26e080f6b5d9c4373e67167ab31f7b86443750" -dependencies = [ - "cc", - "cpp_demangle 0.3.5", - "msvc-demangler", - "rustc-demangle", - "symbolic-common 8.8.0", + "wasmparser", + "zip", ] [[package]] @@ -3496,7 +3335,7 @@ dependencies = [ "cpp_demangle 0.4.0", "msvc-demangler", "rustc-demangle", - "symbolic-common 10.2.1", + "symbolic-common", ] [[package]] @@ -3506,26 +3345,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "125fcd987182e46cd828416a9f2bdb7752c42081b33fa6d80a94afb1fdd4109b" dependencies = [ "indexmap", - "symbolic-common 10.2.1", + "symbolic-common", "thiserror", "uuid 1.2.1", "watto", ] -[[package]] -name = "symbolic-symcache" -version = "8.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9660ab728a9b400c195865453a5b516805cb6e5615e30044c59af6a4f675806b" -dependencies = [ - "dmsort", - "fnv", - "indexmap", - "symbolic-common 8.8.0", - "symbolic-debuginfo 8.8.0", - "thiserror", -] - [[package]] name = "symbolic-symcache" version = "10.2.0" @@ -3533,8 +3358,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "789369a242bacbe89d2f4f6a364f54ea5df1dae774750eb30b335550b315749a" dependencies = [ "indexmap", - "symbolic-common 10.2.1", - "symbolic-debuginfo 10.2.0", + "symbolic-common", + "symbolic-debuginfo", "thiserror", "tracing", "watto", @@ -3912,7 +3737,6 @@ checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ "getrandom 0.2.3", "serde", - "winapi", ] [[package]] @@ -4053,12 +3877,6 @@ version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" -[[package]] -name = "wasmparser" -version = "0.83.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "718ed7c55c2add6548cca3ddd6383d738cd73b892df400e96b9aa876f0141d7a" - [[package]] name = "wasmparser" version = "0.94.0" @@ -4335,18 +4153,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afa18ba5fbd4933e41ffb440c3fd91f91fe9cdb7310cce3ddfb6648563811de0" -[[package]] -name = "zip" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" -dependencies = [ - "byteorder", - "crc32fast", - "flate2", - "thiserror", -] - [[package]] name = "zip" version = "0.6.3" diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 62b1474b20..2786340d70 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -3,7 +3,6 @@ members = [ "atexit", "cobertura", "coverage", - "coverage-legacy", "debuggable-module", "debugger", "dynamic-library", diff --git a/src/agent/coverage-legacy/Cargo.toml b/src/agent/coverage-legacy/Cargo.toml deleted file mode 100644 index e047be3ead..0000000000 --- a/src/agent/coverage-legacy/Cargo.toml +++ /dev/null @@ -1,46 +0,0 @@ -[package] -name = "coverage-legacy" -version = "0.1.0" -authors = ["fuzzing@microsoft.com"] -license = "MIT" -edition = "2018" - -[features] -default = [] -symbol-filter = [] # Remove after impl'd - -[dependencies] -anyhow = "1.0" -bincode = "1.3" -cpp_demangle = "0.3" -debugger = { path = "../debugger" } -dunce = "1.0" -fixedbitset = "0.4" -goblin = "0.5" -iced-x86 = { version = "1.17", features = ["decoder", "op_code_info", "instr_info", "masm"] } -log = "0.4" -memmap2 = "0.5" -msvc-demangler = "0.9" -regex = "1.6.0" -rustc-demangle = "0.1" -serde = { version = "1.0", features = ["derive"] } -symbolic = { version = "8.8", features = ["debuginfo", "demangle", "symcache"] } -uuid = { version = "0.8", features = ["guid"] } -win-util = { path = "../win-util" } -quick-xml = "0.27" - -[target.'cfg(target_os = "windows")'.dependencies] -pdb = "0.7" -winapi = "0.3" - -[target.'cfg(target_os = "linux")'.dependencies] -pete = "0.9" -# For procfs, opt out of the `chrono` freature; it pulls in an old version -# of `time`. We do not use the methods that the `chrono` feature enables. -procfs = { version = "0.12", default-features = false, features=["flate2"] } - -[dev-dependencies] -env_logger = "0.9" -serde_json = { version = "1.0", features = ["preserve_order"] } -structopt = "0.3" -pretty_assertions ="1.3" diff --git a/src/agent/coverage-legacy/examples/coverage.rs b/src/agent/coverage-legacy/examples/coverage.rs deleted file mode 100644 index de0cd53dcb..0000000000 --- a/src/agent/coverage-legacy/examples/coverage.rs +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::path::{Path, PathBuf}; -use std::time::Duration; -use std::{process::Command, process::Stdio}; - -use anyhow::Result; -use coverage_legacy::block::CommandBlockCov as Coverage; -use coverage_legacy::cache::ModuleCache; -use coverage_legacy::code::{CmdFilter, CmdFilterDef}; -use structopt::StructOpt; - -#[derive(Debug, PartialEq, Eq, StructOpt)] -struct Opt { - #[structopt(short, long)] - filter: Option, - - #[structopt(short, long, min_values = 1)] - inputs: Vec, - - #[structopt(min_values = 2)] - cmd: Vec, - - #[structopt(short, long, long_help = "Timeout in ms", default_value = "5000")] - timeout: u64, - - #[structopt(long)] - modoff: bool, -} - -impl Opt { - pub fn load_filter_or_default(&self) -> Result { - if let Some(path) = &self.filter { - let data = std::fs::read(path)?; - let def: CmdFilterDef = serde_json::from_slice(&data)?; - CmdFilter::new(def) - } else { - Ok(CmdFilter::default()) - } - } -} - -fn main() -> Result<()> { - let opt = Opt::from_args(); - let filter = opt.load_filter_or_default()?; - - env_logger::init(); - - let mut cache = ModuleCache::default(); - let mut total = Coverage::default(); - let timeout = Duration::from_millis(opt.timeout); - - for input in &opt.inputs { - let cmd = input_command(&opt.cmd, input); - let coverage = record(&mut cache, filter.clone(), cmd, timeout)?; - - log::info!("input = {}", input.display()); - if !opt.modoff { - print_stats(&coverage); - } - - total.merge_max(&coverage); - } - - if opt.modoff { - print_modoff(&total); - } else { - print_stats(&total); - } - - Ok(()) -} - -fn input_command(argv: &[String], input: &Path) -> Command { - let mut cmd = Command::new(&argv[0]); - cmd.stdin(Stdio::null()); - cmd.stderr(Stdio::null()); - cmd.stdout(Stdio::null()); - - let args: Vec<_> = argv[1..] - .iter() - .map(|a| { - if a == "@@" { - input.display().to_string() - } else { - a.to_string() - } - }) - .collect(); - - cmd.args(&args); - - cmd -} - -#[cfg(target_os = "linux")] -fn record( - cache: &mut ModuleCache, - filter: CmdFilter, - cmd: Command, - timeout: Duration, -) -> Result { - use coverage_legacy::block::linux::Recorder; - - let now = std::time::Instant::now(); - - let coverage = Recorder::record(cmd, timeout, cache, filter)?; - - let elapsed = now.elapsed(); - log::info!("recorded in {:?}", elapsed); - - Ok(coverage) -} - -#[cfg(target_os = "windows")] -fn record( - cache: &mut ModuleCache, - filter: CmdFilter, - cmd: Command, - timeout: Duration, -) -> Result { - use coverage_legacy::block::windows::{Recorder, RecorderEventHandler}; - - let mut recorder = Recorder::new(cache, filter); - let mut handler = RecorderEventHandler::new(&mut recorder, timeout); - - let now = std::time::Instant::now(); - - handler.run(cmd)?; - - let elapsed = now.elapsed(); - log::info!("recorded in {:?}", elapsed); - - Ok(recorder.into_coverage()) -} - -fn print_stats(coverage: &Coverage) { - for (m, c) in coverage.iter() { - let covered = c.covered_blocks(); - let known = c.known_blocks(); - let percent = 100.0 * (covered as f64) / (known as f64); - log::info!( - "{} = {} / {} ({:.2}%)", - m.name_lossy(), - covered, - known, - percent - ); - } -} - -fn print_modoff(coverage: &Coverage) { - for (m, c) in coverage.iter() { - for b in c.blocks.values() { - if b.count > 0 { - println!("{}+{:x}", m.name_lossy(), b.offset); - } - } - } -} diff --git a/src/agent/coverage-legacy/examples/elf_sancov_tables.rs b/src/agent/coverage-legacy/examples/elf_sancov_tables.rs deleted file mode 100644 index 40693699ba..0000000000 --- a/src/agent/coverage-legacy/examples/elf_sancov_tables.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::Result; -use structopt::StructOpt; - -#[derive(Debug, PartialEq, Eq, StructOpt)] -struct Opt { - #[structopt(short, long)] - elf: std::path::PathBuf, - - #[structopt(short, long)] - pcs: bool, - - #[structopt(short, long)] - inline: bool, -} - -#[cfg(target_os = "windows")] -fn main() -> Result<()> { - Ok(()) -} - -#[cfg(target_os = "linux")] -fn main() -> Result<()> { - use coverage_legacy::elf::{ElfContext, ElfSancovBasicBlockProvider}; - use goblin::elf::Elf; - - let opt = Opt::from_args(); - - let data = std::fs::read(opt.elf)?; - let elf = Elf::parse(&data)?; - let ctx = ElfContext::new(&data, &elf)?; - let mut provider = ElfSancovBasicBlockProvider::new(ctx); - - provider.set_check_pc_table(opt.pcs); - - let blocks = provider.provide()?; - - println!("block count = {}", blocks.len()); - println!("blocks = {blocks:x?}"); - - Ok(()) -} diff --git a/src/agent/coverage-legacy/examples/pdb_sancov_tables.rs b/src/agent/coverage-legacy/examples/pdb_sancov_tables.rs deleted file mode 100644 index 86284530c7..0000000000 --- a/src/agent/coverage-legacy/examples/pdb_sancov_tables.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::Result; -use structopt::StructOpt; - -#[derive(Debug, PartialEq, Eq, StructOpt)] -struct Opt { - #[structopt(long)] - pe: std::path::PathBuf, - - #[structopt(long)] - pdb: Option, -} - -#[cfg(target_os = "windows")] -fn main() -> Result<()> { - use coverage_legacy::block::pe_provider::PeSancovBasicBlockProvider; - use goblin::pe::PE; - use pdb::PDB; - - let opt = Opt::from_args(); - - let data = std::fs::read(&opt.pe)?; - let pe = PE::parse(&data)?; - - let pdb = opt - .pdb - .clone() - .unwrap_or_else(|| opt.pe.with_extension("pdb")); - let pdb = std::fs::File::open(pdb)?; - let mut pdb = PDB::open(pdb)?; - - let mut provider = PeSancovBasicBlockProvider::new(&data, &pe, &mut pdb); - let blocks = provider.provide()?; - - println!("blocks = {blocks:x?}"); - - Ok(()) -} - -#[cfg(target_os = "linux")] -fn main() -> Result<()> { - Ok(()) -} diff --git a/src/agent/coverage-legacy/examples/src-cov.rs b/src/agent/coverage-legacy/examples/src-cov.rs deleted file mode 100644 index a295dba9e0..0000000000 --- a/src/agent/coverage-legacy/examples/src-cov.rs +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::path::{Path, PathBuf}; -use std::time::Duration; -use std::{process::Command, process::Stdio}; - -use anyhow::Result; -use coverage_legacy::block::CommandBlockCov as Coverage; -use coverage_legacy::cache::ModuleCache; -use coverage_legacy::code::CmdFilter; -use structopt::StructOpt; - -#[derive(Debug, PartialEq, Eq, StructOpt)] -struct Opt { - #[structopt(short, long, min_values = 1)] - inputs: Vec, - - #[structopt(short, long)] - dir: Option, - - #[structopt(min_values = 2)] - cmd: Vec, - - #[structopt(short, long, long_help = "Timeout in ms", default_value = "120000")] - timeout: u64, - - #[structopt(short = "x", long)] - cobertura_xml: bool, -} - -fn main() -> Result<()> { - let opt = Opt::from_args(); - let filter = CmdFilter::default(); - - let mut cache = ModuleCache::default(); - let mut total = Coverage::default(); - let timeout = Duration::from_millis(opt.timeout); - - if let Some(dir) = &opt.dir { - for entry in std::fs::read_dir(dir)? { - let input = entry?.path(); - - eprintln!("testing input: {}", input.display()); - - let cmd = input_command(&opt.cmd, &input); - let coverage = record(&mut cache, filter.clone(), cmd, timeout)?; - - total.merge_max(&coverage); - } - } - - for input in &opt.inputs { - eprintln!("testing input: {}", input.display()); - - let cmd = input_command(&opt.cmd, input); - let coverage = record(&mut cache, filter.clone(), cmd, timeout)?; - - total.merge_max(&coverage); - } - - let mut debug_info = coverage_legacy::debuginfo::DebugInfo::default(); - let src_coverage = total.source_coverage(&mut debug_info)?; - - if opt.cobertura_xml { - let cobertura = coverage_legacy::cobertura::cobertura(src_coverage)?; - println!("{cobertura}"); - } else { - for file_coverage in src_coverage.files { - for location in &file_coverage.locations { - println!( - "{} {}:{}", - location.count, file_coverage.file, location.line - ); - } - } - } - - Ok(()) -} - -fn input_command(argv: &[String], input: &Path) -> Command { - let mut cmd = Command::new(&argv[0]); - cmd.stdin(Stdio::null()); - cmd.stderr(Stdio::null()); - cmd.stdout(Stdio::null()); - - let args: Vec<_> = argv[1..] - .iter() - .map(|a| { - if a == "@@" { - input.display().to_string() - } else { - a.to_string() - } - }) - .collect(); - - cmd.args(&args); - - cmd -} - -#[cfg(target_os = "linux")] -fn record( - cache: &mut ModuleCache, - filter: CmdFilter, - cmd: Command, - timeout: Duration, -) -> Result { - use coverage_legacy::block::linux::Recorder; - - let now = std::time::Instant::now(); - - let coverage = Recorder::record(cmd, timeout, cache, filter)?; - - let elapsed = now.elapsed(); - log::info!("recorded in {:?}", elapsed); - - Ok(coverage) -} - -#[cfg(target_os = "windows")] -fn record( - cache: &mut ModuleCache, - filter: CmdFilter, - cmd: Command, - timeout: Duration, -) -> Result { - use coverage_legacy::block::windows::{Recorder, RecorderEventHandler}; - - let mut recorder = Recorder::new(cache, filter); - let mut handler = RecorderEventHandler::new(&mut recorder, timeout); - - let now = std::time::Instant::now(); - - handler.run(cmd)?; - - let elapsed = now.elapsed(); - log::info!("recorded in {:?}", elapsed); - - Ok(recorder.into_coverage()) -} diff --git a/src/agent/coverage-legacy/src/block.rs b/src/agent/coverage-legacy/src/block.rs deleted file mode 100644 index 8282a152d2..0000000000 --- a/src/agent/coverage-legacy/src/block.rs +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#[cfg(target_os = "linux")] -pub mod linux; - -#[cfg(target_os = "windows")] -pub mod pe_provider; - -#[cfg(target_os = "windows")] -pub mod windows; - -use std::collections::{btree_map, BTreeMap}; -use std::convert::TryFrom; - -use anyhow::Result; -use serde::{Deserialize, Serialize}; - -use crate::code::ModulePath; -use crate::debuginfo::DebugInfo; -use crate::report::{CoverageReport, CoverageReportEntry}; -use crate::source::SourceCoverage; - -/// Block coverage for a command invocation. -/// -/// Organized by module. -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -#[serde(into = "BlockCoverageReport", try_from = "BlockCoverageReport")] -pub struct CommandBlockCov { - modules: BTreeMap, -} - -impl CommandBlockCov { - /// Returns `true` if the module was newly-inserted (which initializes its - /// block coverage map). Otherwise, returns `false`, and no re-computation - /// is performed. - pub fn insert(&mut self, path: &ModulePath, offsets: impl IntoIterator) -> bool { - use std::collections::btree_map::Entry; - - match self.modules.entry(path.clone()) { - Entry::Occupied(_entry) => false, - Entry::Vacant(entry) => { - entry.insert(ModuleCov::new(offsets)); - true - } - } - } - - pub fn increment(&mut self, path: &ModulePath, offset: u32) { - let entry = self.modules.entry(path.clone()); - - if let btree_map::Entry::Vacant(_) = entry { - log::debug!( - "initializing missing module when incrementing coverage at {}+{:x}", - path, - offset - ); - } - - let module = entry.or_default(); - module.increment(offset); - } - - pub fn iter(&self) -> impl Iterator { - self.modules.iter() - } - - /// Total count of covered blocks across all modules. - pub fn covered_blocks(&self) -> u64 { - self.modules.values().map(|m| m.covered_blocks()).sum() - } - - /// Total count of known blocks across all modules. - pub fn known_blocks(&self) -> u64 { - self.modules.values().map(|m| m.known_blocks()).sum() - } - - pub fn merge_max(&mut self, other: &Self) { - for (module, cov) in other.iter() { - let entry = self.modules.entry(module.clone()).or_default(); - entry.merge_max(cov); - } - } - - /// Total count of blocks covered by modules in `self` but not `other`. - /// - /// Counts modules absent in `self`. - pub fn difference(&self, other: &Self) -> u64 { - let mut total = 0; - - for (module, cov) in &self.modules { - if let Some(other_cov) = other.modules.get(module) { - total += cov.difference(other_cov); - } else { - total += cov.covered_blocks(); - } - } - - total - } - - pub fn into_report(self) -> BlockCoverageReport { - self.into() - } - - pub fn try_from_report(report: BlockCoverageReport) -> Result { - Self::try_from(report) - } - - /// Translate binary block coverage to source line coverage, using a caching - /// debug info provider. - pub fn source_coverage(&self, debuginfo: &mut DebugInfo) -> Result { - use crate::source::{SourceCoverageLocation as Location, *}; - use std::collections::HashMap; - - // Temporary map to collect line coverage results without duplication. - // Will be converted after processing block coverage. - // - // Maps: source_file_path -> (line -> count) - let mut files: HashMap> = HashMap::default(); - - for (module, coverage) in &self.modules { - let loaded = debuginfo.load_module(module.path().to_owned())?; - - if !loaded { - continue; - } - - let mod_info = debuginfo.get(module.path()); - - if let Some(mod_info) = mod_info { - for (offset, block) in &coverage.blocks { - let lines = mod_info.source.lookup(u64::from(*offset))?; - - for line_info in lines { - let line_info = line_info?; - let file = line_info.path().to_owned(); - let line = line_info.line(); - - let file_entry = files.entry(file).or_default(); - let line_entry = file_entry.entry(line).or_insert(0); - - // Will always be 0 or 1. - *line_entry = u32::max(*line_entry, block.count); - } - } - } - } - - let mut src = SourceCoverage::default(); - - for (file, lines) in files { - let mut locations = vec![]; - - for (line, count) in lines { - // Valid lines are always 1-indexed. - if line > 0 { - let location = Location::new(line, None, count)?; - locations.push(location) - } - } - - locations.sort_unstable_by_key(|l| l.line); - - let file_coverage = SourceFileCoverage { file, locations }; - src.files.push(file_coverage); - } - - src.files.sort_unstable_by_key(|f| f.file.clone()); - - Ok(src) - } -} - -impl From for BlockCoverageReport { - fn from(cmd: CommandBlockCov) -> Self { - let mut report = CoverageReport::default(); - - for (module, blocks) in cmd.modules { - let entry = CoverageReportEntry { - module: module.path_lossy(), - metadata: (), - coverage: Block { blocks }, - }; - report.entries.push(entry); - } - - report - } -} - -impl TryFrom> for CommandBlockCov { - type Error = anyhow::Error; - - fn try_from(report: BlockCoverageReport) -> Result { - let mut coverage = Self::default(); - - for entry in report.entries { - let path = ModulePath::new(entry.module.into())?; - let blocks = entry.coverage.blocks.blocks; - let cov = ModuleCov { blocks }; - - coverage.modules.insert(path, cov); - } - - Ok(coverage) - } -} - -pub type BlockCoverageReport = CoverageReport; - -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct Block { - pub blocks: ModuleCov, -} - -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -#[serde(transparent)] -pub struct ModuleCov { - #[serde(with = "array")] - pub blocks: BTreeMap, -} - -impl ModuleCov { - pub fn new(offsets: impl IntoIterator) -> Self { - let blocks = offsets.into_iter().map(|o| (o, BlockCov::new(o))).collect(); - Self { blocks } - } - - /// Total count of blocks that have been reached (have a positive count). - pub fn covered_blocks(&self) -> u64 { - self.blocks.values().filter(|b| b.count > 0).count() as u64 - } - - /// Total count of known blocks. - pub fn known_blocks(&self) -> u64 { - self.blocks.len() as u64 - } - - /// Total count of blocks covered by `self` but not `other`. - /// - /// A difference of 0 does not imply identical coverage, and a positive - /// difference does not imply that `self` covers every block in `other`. - pub fn difference(&self, other: &Self) -> u64 { - let mut total = 0; - - for (offset, block) in &self.blocks { - if let Some(other_block) = other.blocks.get(offset) { - if other_block.count == 0 { - total += u64::min(1, block.count as u64); - } - } else { - total += u64::min(1, block.count as u64); - } - } - - total - } - - pub fn increment(&mut self, offset: u32) { - let block = self - .blocks - .entry(offset) - .or_insert_with(|| BlockCov::new(offset)); - block.count = block.count.saturating_add(1); - } - - pub fn merge_max(&mut self, other: &Self) { - for block in other.blocks.values() { - let entry = self - .blocks - .entry(block.offset) - .or_insert_with(|| BlockCov::new(block.offset)); - entry.count = u32::max(entry.count, block.count); - } - } -} - -/// Coverage info for a specific block, identified by its offset. -#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct BlockCov { - /// Offset of the block, relative to the module base load address. - // - // These offsets come from well-formed executable modules, so we assume they - // can be represented as `u32` values and losslessly serialized to an `f64`. - // - // If we need to handle malformed binaries or arbitrary addresses, then this - // will need revision. - pub offset: u32, - - /// Number of times a block was seen to be executed, relative to some input - /// or corpus. - /// - /// Right now, we only set one-shot breakpoints, so the max `count` for a - /// single input is 1. In this usage, if we measure corpus block coverage - /// with `sum()` as the aggregation function, then `count` / `corpus.len()` - /// tells us the proportion of corpus inputs that cover a block. - /// - /// If we reset breakpoints and recorded multiple block hits per input, then - /// the corpus semantics would depend on the aggregation function. - pub count: u32, -} - -impl BlockCov { - pub fn new(offset: u32) -> Self { - Self { offset, count: 0 } - } -} - -mod array { - use std::collections::BTreeMap; - use std::fmt; - - use serde::de::{self, Deserializer, Visitor}; - use serde::ser::{SerializeSeq, Serializer}; - - use super::BlockCov; - - type BlockCovMap = BTreeMap; - - pub fn serialize(data: &BlockCovMap, ser: S) -> Result - where - S: Serializer, - { - let mut seq = ser.serialize_seq(Some(data.len()))?; - for v in data.values() { - seq.serialize_element(v)?; - } - seq.end() - } - - pub fn deserialize<'d, D>(de: D) -> Result - where - D: Deserializer<'d>, - { - de.deserialize_seq(FlattenVisitor) - } - - struct FlattenVisitor; - - impl<'d> Visitor<'d> for FlattenVisitor { - type Value = BlockCovMap; - - fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "array of blocks") - } - - fn visit_seq(self, mut seq: A) -> Result - where - A: de::SeqAccess<'d>, - { - let mut map = Self::Value::default(); - - while let Some(block) = seq.next_element::()? { - map.insert(block.offset, block); - } - - Ok(map) - } - } -} - -#[cfg(test)] -mod tests { - use anyhow::Result; - use serde_json::json; - - use crate::test::module_path; - - use super::*; - - // Builds a `ModuleCov` from a vec of `(offset, count)` tuples. - fn from_vec(data: Vec<(u32, u32)>) -> ModuleCov { - let offsets = data.iter().map(|(o, _)| *o); - let mut cov = ModuleCov::new(offsets); - for (offset, count) in data { - for _ in 0..count { - cov.increment(offset); - } - } - cov - } - - // Builds a vec of `(offset, count)` tuples from a `ModuleCov`. - fn to_vec(cov: &ModuleCov) -> Vec<(u32, u32)> { - cov.blocks.iter().map(|(o, b)| (*o, b.count)).collect() - } - - #[test] - fn test_module_merge_max() { - let initial = vec![(2, 0), (3, 0), (5, 0), (8, 0)]; - - // Start out with known offsets and no hits. - let mut total = from_vec(initial.clone()); - assert_eq!(to_vec(&total), vec![(2, 0), (3, 0), (5, 0), (8, 0),]); - - // If we merge data that is missing offsets, nothing happens. - let empty = from_vec(vec![]); - total.merge_max(&empty); - assert_eq!(to_vec(&total), vec![(2, 0), (3, 0), (5, 0), (8, 0),]); - - // Merging some known hits updates the total. - let hit_3_8 = from_vec(vec![(2, 0), (3, 1), (5, 0), (8, 1)]); - total.merge_max(&hit_3_8); - assert_eq!(to_vec(&total), vec![(2, 0), (3, 1), (5, 0), (8, 1),]); - - // Merging the same known hits again is idempotent. - total.merge_max(&hit_3_8); - assert_eq!(to_vec(&total), vec![(2, 0), (3, 1), (5, 0), (8, 1),]); - - // Monotonic: merging missed known offsets doesn't lose existing. - let empty = from_vec(initial); - total.merge_max(&empty); - assert_eq!(to_vec(&total), vec![(2, 0), (3, 1), (5, 0), (8, 1),]); - - // Monotonic: merging some known hit, some misses doesn't lose existing. - let hit_3 = from_vec(vec![(2, 0), (3, 1), (5, 0), (8, 0)]); - total.merge_max(&hit_3); - assert_eq!(to_vec(&total), vec![(2, 0), (3, 1), (5, 0), (8, 1),]); - - // Newly-discovered offsets are merged. - let extra = from_vec(vec![ - (1, 0), // New, not hit - (2, 0), - (3, 1), - (5, 0), - (8, 1), - (13, 1), // New, was hit - ]); - total.merge_max(&extra); - assert_eq!( - to_vec(&total), - vec![(1, 0), (2, 0), (3, 1), (5, 0), (8, 1), (13, 1),] - ); - } - - fn cmd_cov_from_vec(data: Vec<(&ModulePath, Vec<(u32, u32)>)>) -> CommandBlockCov { - let mut cov = CommandBlockCov::default(); - - for (path, module_data) in data { - let module_cov = from_vec(module_data); - cov.modules.insert(path.clone(), module_cov); - } - - cov - } - - #[test] - fn test_cmd_cov_increment() -> Result<()> { - let main_exe = module_path("/onefuzz/main.exe")?; - let some_dll = module_path("/common/some.dll")?; - - let mut coverage = CommandBlockCov::default(); - - // Normal initialization, assuming disassembly of module. - coverage.insert(&main_exe, vec![1, 20, 300].into_iter()); - coverage.increment(&main_exe, 20); - - // On-demand module initialization, using only observed offsets. - coverage.increment(&some_dll, 123); - coverage.increment(&some_dll, 456); - coverage.increment(&some_dll, 789); - - let expected = cmd_cov_from_vec(vec![ - (&main_exe, vec![(1, 0), (20, 1), (300, 0)]), - (&some_dll, vec![(123, 1), (456, 1), (789, 1)]), - ]); - - assert_eq!(coverage, expected); - - Ok(()) - } - - #[test] - fn test_cmd_cov_merge_max() -> Result<()> { - let main_exe = module_path("/onefuzz/main.exe")?; - let known_dll = module_path("/common/known.dll")?; - let unknown_dll = module_path("/other/unknown.dll")?; - - let mut total = cmd_cov_from_vec(vec![ - (&main_exe, vec![(2, 0), (40, 1), (600, 0), (8000, 1)]), - (&known_dll, vec![(1, 1), (30, 1), (500, 0), (7000, 0)]), - ]); - - let new = cmd_cov_from_vec(vec![ - (&main_exe, vec![(2, 1), (40, 0), (600, 0), (8000, 0)]), - (&known_dll, vec![(1, 0), (30, 0), (500, 1), (7000, 1)]), - (&unknown_dll, vec![(123, 0), (456, 1)]), - ]); - - total.merge_max(&new); - - let expected = cmd_cov_from_vec(vec![ - (&main_exe, vec![(2, 1), (40, 1), (600, 0), (8000, 1)]), - (&known_dll, vec![(1, 1), (30, 1), (500, 1), (7000, 1)]), - (&unknown_dll, vec![(123, 0), (456, 1)]), - ]); - - assert_eq!(total, expected); - - Ok(()) - } - - #[test] - fn test_block_cov_serde() -> Result<()> { - let block = BlockCov { - offset: 123, - count: 456, - }; - - let ser = serde_json::to_string(&block)?; - - let text = r#"{"offset":123,"count":456}"#; - - assert_eq!(ser, text); - - let de: BlockCov = serde_json::from_str(&ser)?; - - assert_eq!(de, block); - - Ok(()) - } - - #[test] - fn test_cmd_cov_serde() -> Result<()> { - let main_exe = module_path("/onefuzz/main.exe")?; - let some_dll = module_path("/common/some.dll")?; - - let cov = { - let mut cov = CommandBlockCov::default(); - cov.insert(&main_exe, vec![1, 20, 300].into_iter()); - cov.increment(&main_exe, 1); - cov.increment(&main_exe, 300); - cov.insert(&some_dll, vec![2, 30, 400].into_iter()); - cov.increment(&some_dll, 30); - cov - }; - - let ser = serde_json::to_string(&cov)?; - - let text = serde_json::to_string(&json!([ - { - "module": some_dll, - "blocks": [ - { "offset": 2, "count": 0 }, - { "offset": 30, "count": 1 }, - { "offset": 400, "count": 0 }, - ], - }, - { - "module": main_exe, - "blocks": [ - { "offset": 1, "count": 1 }, - { "offset": 20, "count": 0 }, - { "offset": 300, "count": 1 }, - ], - }, - ]))?; - - assert_eq!(ser, text); - - let de: CommandBlockCov = serde_json::from_str(&ser)?; - assert_eq!(de, cov); - - Ok(()) - } - - #[test] - fn test_cmd_cov_stats() -> Result<()> { - let main_exe = module_path("/onefuzz/main.exe")?; - let some_dll = module_path("/common/some.dll")?; - let other_dll = module_path("/common/other.dll")?; - - let empty = CommandBlockCov::default(); - - let mut total: CommandBlockCov = serde_json::from_value(json!([ - { - "module": some_dll, - "blocks": [ - { "offset": 2, "count": 0 }, - { "offset": 30, "count": 1 }, - { "offset": 400, "count": 0 }, - ], - }, - { - "module": main_exe, - "blocks": [ - { "offset": 1, "count": 2 }, - { "offset": 20, "count": 0 }, - { "offset": 300, "count": 3 }, - ], - }, - ]))?; - - assert_eq!(total.known_blocks(), 6); - assert_eq!(total.covered_blocks(), 3); - assert_eq!(total.covered_blocks(), total.difference(&empty)); - assert_eq!(total.difference(&total), 0); - - let new: CommandBlockCov = serde_json::from_value(json!([ - { - "module": some_dll, - "blocks": [ - { "offset": 2, "count": 0 }, - { "offset": 22, "count": 4 }, - { "offset": 30, "count": 5 }, - { "offset": 400, "count": 6 }, - ], - }, - { - "module": main_exe, - "blocks": [ - { "offset": 1, "count": 0 }, - { "offset": 300, "count": 1 }, - { "offset": 5000, "count": 0 }, - ], - }, - { - "module": other_dll, - "blocks": [ - { "offset": 123, "count": 0 }, - { "offset": 456, "count": 10 }, - ], - }, - ]))?; - - assert_eq!(new.known_blocks(), 9); - assert_eq!(new.covered_blocks(), 5); - assert_eq!(new.covered_blocks(), new.difference(&empty)); - assert_eq!(new.difference(&new), 0); - - assert_eq!(new.difference(&total), 3); - assert_eq!(total.difference(&new), 1); - - total.merge_max(&new); - - assert_eq!(total.known_blocks(), 10); - assert_eq!(total.covered_blocks(), 6); - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/block/linux.rs b/src/agent/coverage-legacy/src/block/linux.rs deleted file mode 100644 index 52dc83071d..0000000000 --- a/src/agent/coverage-legacy/src/block/linux.rs +++ /dev/null @@ -1,495 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeMap; -use std::convert::TryInto; -use std::ffi::OsStr; -use std::process::Command; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{mpsc, Arc}; -use std::thread; -use std::time::{Duration, Instant}; - -use anyhow::{format_err, Context, Result}; -use pete::{Ptracer, Restart, Signal, Stop, Tracee}; -use procfs::process::{MMapPath, MemoryMap, Process}; - -use crate::block::CommandBlockCov; -use crate::cache::ModuleCache; -use crate::code::{CmdFilter, ModulePath}; -use crate::demangle::Demangler; -use crate::region::Region; - -#[derive(Debug)] -pub struct Recorder<'c> { - breakpoints: Breakpoints, - cache: &'c mut ModuleCache, - coverage: CommandBlockCov, - demangler: Demangler, - filter: CmdFilter, - images: Option, - tracer: Ptracer, -} - -impl<'c> Recorder<'c> { - pub fn record( - cmd: Command, - timeout: Duration, - cache: &'c mut ModuleCache, - filter: CmdFilter, - ) -> Result { - let mut tracer = Ptracer::new(); - let mut child = tracer.spawn(cmd)?; - - let timer = Timer::new(timeout, move || child.kill()); - - let recorder = Recorder { - breakpoints: Breakpoints::default(), - cache, - coverage: CommandBlockCov::default(), - demangler: Demangler::default(), - filter, - images: None, - tracer, - }; - - let coverage = recorder.wait()?; - - if timer.timed_out() { - Err(anyhow::format_err!( - "timed out creating recording after {}s", - timeout.as_secs_f64() - )) - } else { - Ok(coverage) - } - } - - fn wait(mut self) -> Result { - use pete::ptracer::Options; - - // Continue the tracee process until the return from its initial `execve()`. - let mut tracee = continue_to_init_execve(&mut self.tracer)?; - - // Do not follow forks. - // - // After this, we assume that any new tracee is a thread in the same - // group as the root tracee. - let mut options = Options::all(); - options.remove(Options::PTRACE_O_TRACEFORK); - options.remove(Options::PTRACE_O_TRACEVFORK); - options.remove(Options::PTRACE_O_TRACEEXEC); - tracee - .set_options(options) - .context("setting tracee options")?; - - self.images = Some(Images::new(tracee.pid.as_raw())); - self.update_images(&mut tracee) - .context("initial update of module images")?; - - self.tracer - .restart(tracee, Restart::Syscall) - .context("initial tracer restart")?; - - while let Some(mut tracee) = self.tracer.wait().context("main tracing loop")? { - match tracee.stop { - Stop::SyscallEnter => log::trace!("syscall-enter: {:?}", tracee.stop), - Stop::SyscallExit => { - self.update_images(&mut tracee) - .context("updating module images after syscall-stop")?; - } - Stop::SignalDelivery { - signal: Signal::SIGTRAP, - } => { - self.on_breakpoint(&mut tracee) - .context("calling breakpoint handler")?; - } - Stop::Clone { new: pid } => { - // Only seen when the `VM_CLONE` flag is set, as of Linux 4.15. - log::info!("new thread: {}", pid); - } - _ => { - log::debug!("stop: {:?}", tracee.stop); - } - } - - if let Err(err) = self.tracer.restart(tracee, Restart::Syscall) { - log::error!("unable to restart tracee: {}", err); - } - } - - Ok(self.coverage) - } - - fn update_images(&mut self, tracee: &mut Tracee) -> Result<()> { - let images = self - .images - .as_mut() - .ok_or_else(|| format_err!("internal error: recorder images not initialized"))?; - let events = images.update()?; - - for (_base, image) in &events.loaded { - if self.filter.includes_module(image.path()) { - self.on_module_load(tracee, image) - .context("module load callback")?; - } - } - - Ok(()) - } - - fn on_breakpoint(&mut self, tracee: &mut Tracee) -> Result<()> { - let mut regs = tracee.registers()?; - - // Adjust for synthetic `int3`. - let pc = regs.rip - 1; - - log::trace!("hit breakpoint: pc = {:x}, pid = {}", pc, tracee.pid); - - if self.breakpoints.clear(tracee, pc)? { - let images = self - .images - .as_ref() - .ok_or_else(|| format_err!("internal error: recorder images not initialized"))?; - let image = images - .find_va_image(pc) - .ok_or_else(|| format_err!("unable to find image for va = {:x}", pc))?; - - let offset = image - .va_to_offset(pc) - .context("converting PC to module offset")?; - self.coverage.increment(image.path(), offset); - - // Execute clobbered instruction on restart. - regs.rip = pc; - tracee - .set_registers(regs) - .context("resetting PC in breakpoint handler")?; - } else { - // Assume the tracee concurrently executed an `int3` that we restored - // in another handler. - // - // We could improve on this by not removing breakpoints metadata when - // clearing, but making their value a state. - log::debug!("no breakpoint at {:x}, assuming race", pc); - regs.rip = pc; - tracee - .set_registers(regs) - .context("resetting PC after ignoring spurious breakpoint")?; - } - - Ok(()) - } - - fn on_module_load(&mut self, tracee: &mut Tracee, image: &ModuleImage) -> Result<()> { - log::info!("module load: {}", image.path()); - - // Fetch disassembled module info via cache. - let info = self - .cache - .fetch(image.path())? - .ok_or_else(|| format_err!("unable to fetch info for module: {}", image.path()))?; - - // Collect blocks allowed by the symbol filter. - let mut allowed_blocks = vec![]; - - for symbol in info.module.symbols.iter() { - // Try to demangle the symbol name for filtering. If no demangling - // is found, fall back to the raw name. - let symbol_name = self - .demangler - .demangle(&symbol.name) - .unwrap_or_else(|| symbol.name.clone()); - - // Check the maybe-demangled against the coverage filter. - if self.filter.includes_symbol(&info.module.path, symbol_name) { - // Convert range bounds to an `offset`-sized type. - let range = { - let range = symbol.range(); - let lo: u32 = range.start.try_into()?; - let hi: u32 = range.end.try_into()?; - lo..hi - }; - - for offset in info.blocks.range(range) { - allowed_blocks.push(*offset); - } - } - } - - // Initialize module coverage info. - let new = self - .coverage - .insert(image.path(), allowed_blocks.iter().copied()); - - // If module coverage is already initialized, we're done. - if !new { - return Ok(()); - } - - // Set breakpoints by module block entry points. - for offset in &allowed_blocks { - let va = image.offset_to_va(*offset); - self.breakpoints.set(tracee, va)?; - log::trace!("set breakpoint, va = {:x}, pid = {}", va, tracee.pid); - } - - Ok(()) - } -} - -/// Executable memory-mapped files for a process. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Images { - mapped: BTreeMap, - pid: i32, -} - -impl Images { - pub fn new(pid: i32) -> Self { - let mapped = BTreeMap::default(); - - Self { mapped, pid } - } - - pub fn mapped(&self) -> impl Iterator { - self.mapped.iter().map(|(va, i)| (*va, i)) - } - - pub fn update(&mut self) -> Result { - let proc = Process::new(self.pid).context("getting procinfo")?; - - let mut new = BTreeMap::default(); - - for map in proc.maps().context("getting maps for process")? { - if let Ok(image) = ModuleImage::new(map) { - new.insert(image.base(), image); - } - } - - let events = LoadEvents::new(&self.mapped, &new); - - self.mapped = new; - - Ok(events) - } - - pub fn find_va_image(&self, va: u64) -> Option<&ModuleImage> { - for (base, image) in self.mapped() { - if va < base { - continue; - } - - if image.region().contains(&va) { - return Some(image); - } - } - - None - } -} - -/// A `MemoryMap` that is known to be file-backed and executable. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ModuleImage { - map: MemoryMap, - path: ModulePath, -} - -impl ModuleImage { - pub fn new(map: MemoryMap) -> Result { - if let MMapPath::Path(path) = &map.pathname { - if map.perms.contains('x') { - // Copy the path into a wrapper type that encodes extra guarantees. - let path = ModulePath::new(path.clone())?; - - Ok(ModuleImage { map, path }) - } else { - anyhow::bail!("memory mapping is not executable"); - } - } else { - anyhow::bail!("memory mapping is not file-backed"); - } - } - - pub fn name(&self) -> &OsStr { - self.path.name() - } - - pub fn path(&self) -> &ModulePath { - &self.path - } - - pub fn map(&self) -> &MemoryMap { - &self.map - } - - pub fn base(&self) -> u64 { - self.map.address.0 - self.map.offset - } - - pub fn size(&self) -> u64 { - self.map.address.1 - self.map.address.0 - } - - pub fn region(&self) -> std::ops::Range { - (self.map.address.0)..(self.map.address.1) - } - - pub fn va_to_offset(&self, va: u64) -> Result { - if let Some(offset) = va.checked_sub(self.base()) { - Ok(offset.try_into().context("ELF offset overflowed `u32`")?) - } else { - anyhow::bail!("underflow converting VA to image offset") - } - } - - pub fn offset_to_va(&self, offset: u32) -> u64 { - self.base() + (offset as u64) - } -} - -pub struct LoadEvents { - pub loaded: Vec<(u64, ModuleImage)>, - pub unloaded: Vec<(u64, ModuleImage)>, -} - -impl LoadEvents { - pub fn new(old: &BTreeMap, new: &BTreeMap) -> Self { - // New not in old. - let loaded: Vec<_> = new - .iter() - .filter(|(nva, n)| { - !old.iter() - .any(|(iva, i)| *nva == iva && n.path() == i.path()) - }) - .map(|(va, i)| (*va, i.clone())) - .collect(); - - // Old not in new. - let unloaded: Vec<_> = old - .iter() - .filter(|(iva, i)| { - !new.iter() - .any(|(nva, n)| nva == *iva && n.path() == i.path()) - }) - .map(|(va, i)| (*va, i.clone())) - .collect(); - - Self { loaded, unloaded } - } -} - -#[derive(Clone, Debug, Default)] -pub struct Breakpoints { - saved: BTreeMap, -} - -impl Breakpoints { - pub fn set(&mut self, tracee: &mut Tracee, va: u64) -> Result<()> { - // Return if the breakpoint exists. We don't want to conclude that the - // saved instruction byte was `0xcc`. - if self.saved.contains_key(&va) { - return Ok(()); - } - - let mut data = [0u8]; - tracee.read_memory_mut(va, &mut data)?; - self.saved.insert(va, data[0]); - tracee - .write_memory(va, &[0xcc]) - .context("setting breakpoint, writing int3")?; - - Ok(()) - } - - pub fn clear(&mut self, tracee: &mut Tracee, va: u64) -> Result { - let data = self.saved.remove(&va); - - let cleared = if let Some(data) = data { - tracee - .write_memory(va, &[data]) - .context("clearing breakpoint, restoring byte")?; - true - } else { - false - }; - - Ok(cleared) - } -} - -fn continue_to_init_execve(tracer: &mut Ptracer) -> Result { - while let Some(tracee) = tracer.wait()? { - if let Stop::SyscallExit = &tracee.stop { - return Ok(tracee); - } - - tracer - .restart(tracee, Restart::Continue) - .context("restarting tracee pre-execve()")?; - } - - anyhow::bail!("did not see initial execve() in tracee while recording coverage"); -} - -const MAX_POLL_PERIOD: Duration = Duration::from_millis(500); - -pub struct Timer { - sender: mpsc::Sender<()>, - timed_out: Arc, - _handle: thread::JoinHandle<()>, -} - -impl Timer { - pub fn new(timeout: Duration, on_timeout: F) -> Self - where - F: FnOnce() -> T + Send + 'static, - { - let (sender, receiver) = std::sync::mpsc::channel(); - let timed_out = Arc::new(AtomicBool::new(false)); - - let set_timed_out = timed_out.clone(); - let _handle = thread::spawn(move || { - let poll_period = Duration::min(timeout, MAX_POLL_PERIOD); - let start = Instant::now(); - - while start.elapsed() < timeout { - thread::sleep(poll_period); - - // Check if the timer has been cancelled. - if let Err(mpsc::TryRecvError::Empty) = receiver.try_recv() { - continue; - } else { - // We were cancelled or dropped, so return early and don't call back. - return; - } - } - - set_timed_out.store(true, Ordering::SeqCst); - // Timed out, so call back. - on_timeout(); - }); - - Self { - sender, - _handle, - timed_out, - } - } - - pub fn timed_out(&self) -> bool { - self.timed_out.load(Ordering::SeqCst) - } - - pub fn cancel(self) { - // Drop `self`. - } -} - -impl Drop for Timer { - fn drop(&mut self) { - // Ignore errors, because they just mean the receiver has been dropped. - let _ = self.sender.send(()); - } -} diff --git a/src/agent/coverage-legacy/src/block/pe_provider.rs b/src/agent/coverage-legacy/src/block/pe_provider.rs deleted file mode 100644 index f020e1b707..0000000000 --- a/src/agent/coverage-legacy/src/block/pe_provider.rs +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeSet; -use std::convert::TryInto; - -use anyhow::{format_err, Result}; -use goblin::pe::PE; -use pdb::{ - AddressMap, DataSymbol, FallibleIterator, ProcedureSymbol, Rva, Source, SymbolData, PDB, -}; - -use crate::sancov::{SancovDelimiters, SancovInlineAccessScanner, SancovTable}; - -/// Basic block offset provider for uninstrumented PE modules. -pub struct PeBasicBlockProvider {} - -/// Basic block offset provider for Sancov-instrumented PE modules. -pub struct PeSancovBasicBlockProvider<'d, 'p, D> { - data: &'p [u8], - pe: &'p PE<'p>, - pdb: &'p mut PDB<'d, D>, -} - -impl<'d, 'p, D> PeSancovBasicBlockProvider<'d, 'p, D> -where - D: Source<'d> + 'd, -{ - pub fn new(data: &'p [u8], pe: &'p PE<'p>, pdb: &'p mut PDB<'d, D>) -> Self { - Self { data, pe, pdb } - } - - /// Try to provide basic block offsets using available Sancov table symbols. - /// - /// If PC tables are available and definitely well-formed, use those - /// directly. Otherwise, look for an inline counter or bool flag array, then - /// disassemble all functions to reverse the instrumentation sites. - pub fn provide(&mut self) -> Result> { - let mut visitor = SancovDelimiterVisitor::new(self.pdb.address_map()?); - - let global_symbols = self.pdb.global_symbols()?; - let mut iter = global_symbols.iter(); - - // Search symbols which delimit Sancov tables. - while let Some(symbol) = iter.next()? { - if let Ok(SymbolData::Data(data)) = symbol.parse() { - visitor.visit_data_symbol(&data)?; - } - } - - // If we found a non-empty PC table, try to parse it. - if let Some(pcs_table) = visitor.delimiters.pcs_table(true) { - // Discovering and parsing the PC table can be error-prone, if we even have it. Mine it - // for PCs if we can, with some strict assumptions. If we can't, fall back on reversing - // the inline table accesses. - if let Ok(blocks) = self.provide_from_pcs_table(pcs_table) { - return Ok(blocks); - } - } - - // Either the PC table was empty, or something went wrong when parsing it. - // - // If we found any inline table, then we should still be able to reverse the instrumentation - // sites by disassembling instructions that access the inline table region in expected ways. - if let Some(inline_table) = visitor.delimiters.inline_table(true) { - return self.provide_from_inline_table(inline_table); - } - - anyhow::bail!("unable to find Sancov table") - } - - // Search for instructions that access a known inline table region, and use their offsets to - // reverse the instrumented basic blocks. - fn provide_from_inline_table(&mut self, inline_table: SancovTable) -> Result> { - let mut visitor = - SancovInlineAccessVisitor::new(inline_table, self.data, self.pe, self.pdb)?; - - let debug_info = self.pdb.debug_information()?; - let mut modules = debug_info.modules()?; - - while let Some(module) = modules.next()? { - if let Some(module_info) = self.pdb.module_info(&module)? { - let mut symbols = module_info.symbols()?; - while let Some(symbol) = symbols.next()? { - if let Ok(SymbolData::Procedure(proc)) = symbol.parse() { - visitor.visit_procedure_symbol(&proc)?; - } - } - } - } - - let global_symbols = self.pdb.global_symbols()?; - let mut iter = global_symbols.iter(); - - while let Some(symbol) = iter.next()? { - if let Ok(SymbolData::Procedure(proc)) = symbol.parse() { - visitor.visit_procedure_symbol(&proc)?; - } - } - - Ok(visitor.scanner.offsets) - } - - // Try to parse instrumented VAs directly from the PC table. - // - // Currently this assumes `sizeof(uintptr_t) == 8` for the target PE. - fn provide_from_pcs_table(&mut self, pcs_table: SancovTable) -> Result> { - // Read the PE directly to extract the PCs from the PC table. - let parse_options = goblin::pe::options::ParseOptions::default(); - let pe_alignment = self - .pe - .header - .optional_header - .ok_or_else(|| format_err!("PE file missing optional header"))? - .windows_fields - .file_alignment; - let pe_offset = goblin::pe::utils::find_offset( - pcs_table.offset as usize, - &self.pe.sections, - pe_alignment, - &parse_options, - ); - let pe_offset = - pe_offset.ok_or_else(|| format_err!("could not find file offset for sancov table"))?; - let table_range = pe_offset..(pe_offset + pcs_table.size); - let pcs_table_data = self - .data - .get(table_range) - .ok_or_else(|| format_err!("sancov table slice out of file range"))?; - - if pcs_table_data.len() % 16 != 0 { - anyhow::bail!("invalid PC table size"); - } - - let mut pcs = BTreeSet::default(); - - let module_base: u64 = self.pe.image_base.try_into()?; - - // Each entry is a struct with 2 `uintptr_t` values: a PC, then a flag. - // We only want the PC, so start at 0 (the default) and step by 2 to - // skip the flags. - for chunk in pcs_table_data.chunks(8).step_by(2) { - let le: [u8; 8] = chunk.try_into()?; - let pc = u64::from_le_bytes(le); - let pc_offset: u32 = pc - .checked_sub(module_base) - .ok_or_else(|| { - format_err!( - "underflow when computing offset from VA: {:x} - {:x}", - pc, - module_base, - ) - })? - .try_into()?; - pcs.insert(pc_offset); - } - - Ok(pcs) - } -} - -/// Searches a PDB for data symbols that delimit various Sancov tables. -#[derive(Default)] -pub struct SancovDelimiterVisitor<'am> { - address_map: AddressMap<'am>, - delimiters: SancovDelimiters, -} - -impl<'am> SancovDelimiterVisitor<'am> { - pub fn new(address_map: AddressMap<'am>) -> Self { - let delimiters = SancovDelimiters::default(); - - Self { - address_map, - delimiters, - } - } - - /// Visit a data symbol and check if it is a known Sancov delimiter. If it is, save its value. - /// - /// We want to visit all delimiter symbols, since we can only determine the redundant delimiters - /// if we know that there are more compiler-specific variants present. - pub fn visit_data_symbol(&mut self, data: &DataSymbol) -> Result<()> { - let name = &*data.name.to_string(); - - if let Ok(delimiter) = name.parse() { - if let Some(Rva(offset)) = data.offset.to_rva(&self.address_map) { - self.delimiters.insert(delimiter, offset); - } else { - log::error!("unable to map internal offset to RVA"); - } - } - - Ok(()) - } -} - -pub struct SancovInlineAccessVisitor<'d, 'p> { - address_map: AddressMap<'d>, - data: &'p [u8], - pe: &'p PE<'p>, - scanner: SancovInlineAccessScanner, -} - -impl<'d, 'p> SancovInlineAccessVisitor<'d, 'p> { - pub fn new<'pdb, D>( - table: SancovTable, - data: &'p [u8], - pe: &'p PE<'p>, - pdb: &'pdb mut PDB<'d, D>, - ) -> Result - where - D: Source<'d> + 'd, - { - let address_map = pdb.address_map()?; - let base: u64 = pe.image_base.try_into()?; - let scanner = SancovInlineAccessScanner::new(base, table); - - Ok(Self { - address_map, - data, - pe, - scanner, - }) - } - - pub fn visit_procedure_symbol(&mut self, proc: &ProcedureSymbol) -> Result<()> { - let parse_options = goblin::pe::options::ParseOptions::default(); - let alignment = self - .pe - .header - .optional_header - .ok_or_else(|| format_err!("PE file missing optional header"))? - .windows_fields - .file_alignment; - - let rva: usize = proc - .offset - .to_rva(&self.address_map) - .ok_or_else(|| format_err!("unable to convert PDB offset to RVA"))? - .0 - .try_into()?; - - let file_offset = - goblin::pe::utils::find_offset(rva, &self.pe.sections, alignment, &parse_options) - .ok_or_else(|| format_err!("unable to find PE offset for RVA"))?; - - let range = file_offset..(file_offset + proc.len as usize); - - let data = self - .data - .get(range) - .ok_or_else(|| format_err!("invalid PE file range for procedure data"))?; - - let offset: u64 = rva.try_into()?; - let va: u64 = self.scanner.base + offset; - - if let Err(err) = self.scanner.scan(data, va) { - // Errors here are aren't fatal to the larger reversing of inline accesses. - // - // Typically, it means we attempted to disassemble local data (like jump tables). - log::warn!( - "error scanning procedure code for inline accesses; procedure = {}, error = {}", - proc.name, - err - ); - } - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/block/windows.rs b/src/agent/coverage-legacy/src/block/windows.rs deleted file mode 100644 index fa35e8dd82..0000000000 --- a/src/agent/coverage-legacy/src/block/windows.rs +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeMap; -use std::process::Command; -use std::time::{Duration, Instant}; - -use anyhow::{Context, Result}; -use debugger::{BreakpointId, BreakpointType, DebugEventHandler, Debugger, ModuleLoadInfo}; - -use crate::block::CommandBlockCov; -use crate::cache::ModuleCache; -use crate::code::{CmdFilter, ModulePath}; - -pub fn record(cmd: Command, filter: CmdFilter, timeout: Duration) -> Result { - let mut cache = ModuleCache::default(); - let mut recorder = Recorder::new(&mut cache, filter); - let mut handler = RecorderEventHandler::new(&mut recorder, timeout); - handler.run(cmd)?; - Ok(recorder.into_coverage()) -} - -#[derive(Debug)] -pub struct RecorderEventHandler<'r, 'c> { - recorder: &'r mut Recorder<'c>, - started: Instant, - timed_out: bool, - timeout: Duration, -} - -impl<'r, 'c> RecorderEventHandler<'r, 'c> { - pub fn new(recorder: &'r mut Recorder<'c>, timeout: Duration) -> Self { - let started = Instant::now(); - let timed_out = false; - - Self { - recorder, - started, - timed_out, - timeout, - } - } - - pub fn time_out(&self) -> bool { - self.timed_out - } - - pub fn timeout(&self) -> Duration { - self.timeout - } - - pub fn run(&mut self, cmd: Command) -> Result<()> { - let (mut dbg, _child) = Debugger::init(cmd, self).context("initializing debugger")?; - dbg.run(self).context("running debuggee")?; - Ok(()) - } - - fn on_poll(&mut self, dbg: &mut Debugger) { - if !self.timed_out && self.started.elapsed() > self.timeout { - self.timed_out = true; - dbg.quit_debugging(); - } - } - - fn stop(&self, dbg: &mut Debugger) { - dbg.quit_debugging(); - } -} - -#[derive(Debug)] -pub struct Recorder<'c> { - breakpoints: Breakpoints, - - // Reference to allow in-memory reuse across runs. - cache: &'c mut ModuleCache, - - // Note: this could also be a reference to enable reuse across runs, to - // support implicit calculation of total coverage for a corpus. For now, - // assume callers will merge this into a separate struct when needed. - coverage: CommandBlockCov, - - filter: CmdFilter, -} - -impl<'c> Recorder<'c> { - pub fn new(cache: &'c mut ModuleCache, filter: CmdFilter) -> Self { - let breakpoints = Breakpoints::default(); - let coverage = CommandBlockCov::default(); - - Self { - breakpoints, - cache, - coverage, - filter, - } - } - - pub fn coverage(&self) -> &CommandBlockCov { - &self.coverage - } - - pub fn into_coverage(self) -> CommandBlockCov { - self.coverage - } - - pub fn on_create_process(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> { - log::debug!("process created: {}", module.path().display()); - - // Not necessary for PDB search, but enables use of other `dbghelp` APIs. - if let Err(err) = dbg.target().maybe_sym_initialize() { - log::error!( - "unable to initialize symbol handler for new process {}: {:?}", - module.path().display(), - err, - ); - } - - self.insert_module(dbg, module) - } - - pub fn on_load_dll(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> { - log::debug!("DLL loaded: {}", module.path().display()); - - self.insert_module(dbg, module) - } - - pub fn on_breakpoint(&mut self, dbg: &mut Debugger, id: BreakpointId) -> Result<()> { - if let Some(breakpoint) = self.breakpoints.get(id) { - if log::max_level() == log::Level::Trace { - let name = breakpoint.module.name().to_string_lossy(); - let offset = breakpoint.offset; - let pc = dbg - .read_program_counter() - .context("reading PC on breakpoint")?; - - if let Ok(sym) = dbg.get_symbol(pc) { - log::trace!( - "{:>16x}: {}+{:x} ({}+{:x})", - pc, - name, - offset, - sym.symbol(), - sym.displacement(), - ); - } else { - log::trace!("{:>16x}: {}+{:x}", pc, name, offset); - } - } - - self.coverage - .increment(breakpoint.module, breakpoint.offset); - } else { - let pc = if let Ok(pc) = dbg.read_program_counter() { - format!("{pc:x}") - } else { - "???".into() - }; - - log::error!("hit breakpoint without data, id = {}, pc = {}", id.0, pc); - } - - Ok(()) - } - - fn insert_module(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> { - let path = ModulePath::new(module.path().to_owned()).context("parsing module path")?; - - if !self.filter.includes_module(&path) { - log::debug!("skipping module: {}", path); - return Ok(()); - } - - // Do not pass the debuggee's actual process handle here. Any passed handle is - // used as the symbol handler context within the cache's PDB search. Instead, use - // the default internal pseudo-handle for "static" `dbghelp` usage. This lets us - // query `dbghelp` immediately upon observing the `CREATE_PROCESS_DEBUG_EVENT`, - // before we would be able to for a running debuggee. - match self.cache.fetch(&path, None) { - Ok(Some(info)) => { - let new = self.coverage.insert(&path, info.blocks.iter().copied()); - - if !new { - return Ok(()); - } - - self.breakpoints - .set(dbg, module, info.blocks.iter().copied()) - .context("setting breakpoints for module")?; - - log::debug!("set {} breakpoints for module {}", info.blocks.len(), path); - } - Ok(None) => { - log::debug!("could not find module: {}", path); - } - Err(err) => { - log::debug!("could not disassemble module {}: {:?}", path, err); - } - } - - Ok(()) - } -} - -impl<'r, 'c> DebugEventHandler for RecorderEventHandler<'r, 'c> { - fn on_create_process(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) { - if self.recorder.on_create_process(dbg, module).is_err() { - self.stop(dbg); - } - } - - fn on_load_dll(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) { - if self.recorder.on_load_dll(dbg, module).is_err() { - self.stop(dbg); - } - } - - fn on_breakpoint(&mut self, dbg: &mut Debugger, bp: BreakpointId) { - if self.recorder.on_breakpoint(dbg, bp).is_err() { - self.stop(dbg); - } - } - - fn on_poll(&mut self, dbg: &mut Debugger) { - self.on_poll(dbg); - } -} - -/// Relates opaque, runtime-generated breakpoint IDs to their corresponding -/// location, via module and offset. -#[derive(Clone, Debug, Default)] -struct Breakpoints { - // Breakpoint-associated module paths, referenced by index to save space and - // avoid copying. - modules: Vec, - - // Map of breakpoint IDs to data which pick out an code location. For a - // value `(module, offset)`, `module` is an index into `self.modules`, and - // `offset` is a VA offset relative to the module base. - registered: BTreeMap, -} - -impl Breakpoints { - pub fn get(&self, id: BreakpointId) -> Option> { - let (module_index, offset) = self.registered.get(&id).copied()?; - let module = self.modules.get(module_index)?; - Some(BreakpointData { module, offset }) - } - - pub fn set( - &mut self, - dbg: &mut Debugger, - module: &ModuleLoadInfo, - offsets: impl Iterator, - ) -> Result<()> { - // From the `debugger::ModuleLoadInfo`, create and save a `ModulePath`. - let module_path = ModulePath::new(module.path().to_owned())?; - let module_index = self.modules.len(); - self.modules.push(module_path); - - for offset in offsets { - // Register the breakpoint in the running target address space. - let id = - dbg.new_rva_breakpoint(module.name(), offset as u64, BreakpointType::OneTime)?; - - // Associate the opaque `BreakpointId` with the module and offset. - self.registered.insert(id, (module_index, offset)); - } - - log::debug!("{} total registered modules", self.modules.len()); - log::debug!("{} total registered breakpoints", self.registered.len()); - - Ok(()) - } -} - -/// Code location data associated with an opaque breakpoint ID. -#[derive(Clone, Copy, Debug)] -pub struct BreakpointData<'a> { - pub module: &'a ModulePath, - pub offset: u32, -} diff --git a/src/agent/coverage-legacy/src/cache.rs b/src/agent/coverage-legacy/src/cache.rs deleted file mode 100644 index 0a3c45dd39..0000000000 --- a/src/agent/coverage-legacy/src/cache.rs +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::{BTreeSet, HashMap}; - -#[cfg(any(target_os = "windows", target_os = "linux"))] -use anyhow::Result; -use serde::{Deserialize, Serialize}; - -#[cfg(target_os = "windows")] -use winapi::um::winnt::HANDLE; - -use crate::code::{ModuleIndex, ModulePath}; - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct ModuleCache { - pub cached: HashMap, -} - -impl ModuleCache { - pub fn new() -> Self { - let cached = HashMap::new(); - - Self { cached } - } - - #[cfg(target_os = "linux")] - pub fn fetch(&mut self, path: &ModulePath) -> Result> { - if !self.cached.contains_key(path) { - self.insert(path)?; - } - - Ok(self.cached.get(path)) - } - - #[cfg(target_os = "windows")] - pub fn fetch( - &mut self, - path: &ModulePath, - handle: impl Into>, - ) -> Result> { - if !self.cached.contains_key(path) { - self.insert(path, handle)?; - } - - Ok(self.cached.get(path)) - } - - #[cfg(target_os = "linux")] - pub fn insert(&mut self, path: &ModulePath) -> Result<()> { - let entry = ModuleInfo::new_elf(path)?; - self.cached.insert(path.clone(), entry); - Ok(()) - } - - #[cfg(target_os = "windows")] - pub fn insert(&mut self, path: &ModulePath, handle: impl Into>) -> Result<()> { - let entry = ModuleInfo::new_pe(path, handle)?; - self.cached.insert(path.clone(), entry); - Ok(()) - } -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct ModuleInfo { - /// Index of the module segments and symbol metadata. - pub module: ModuleIndex, - - /// Set of image offsets of basic blocks. - pub blocks: BTreeSet, -} - -impl ModuleInfo { - #[cfg(target_os = "linux")] - pub fn new_elf(path: &ModulePath) -> Result { - use crate::elf::{ElfContext, ElfSancovBasicBlockProvider}; - - let data = std::fs::read(path)?; - let elf = goblin::elf::Elf::parse(&data)?; - let module = ModuleIndex::index_elf(path.clone(), &elf)?; - - let ctx = ElfContext::new(&data, &elf)?; - let mut sancov_provider = ElfSancovBasicBlockProvider::new(ctx); - let blocks = if let Ok(blocks) = sancov_provider.provide() { - blocks - } else { - let disasm = crate::disasm::ModuleDisassembler::new(&module, &data)?; - disasm.find_blocks() - }; - - Ok(Self { module, blocks }) - } - - #[cfg(target_os = "windows")] - pub fn new_pe(path: &ModulePath, handle: impl Into>) -> Result { - use crate::block::pe_provider::PeSancovBasicBlockProvider; - - let handle = handle.into(); - - let file = std::fs::File::open(path)?; - let data = unsafe { memmap2::Mmap::map(&file)? }; - - let pe = goblin::pe::PE::parse(&data)?; - let module = ModuleIndex::index_pe(path.clone(), &pe); - - let pdb_path = crate::pdb::find_pdb_path(path.as_ref(), &pe, handle)? - .ok_or_else(|| anyhow::format_err!("could not find PDB for module: {}", path))?; - - let pdb = std::fs::File::open(pdb_path)?; - let mut pdb = pdb::PDB::open(pdb)?; - - let mut sancov_provider = PeSancovBasicBlockProvider::new(&data, &pe, &mut pdb); - - let blocks = if let Ok(blocks) = sancov_provider.provide() { - blocks - } else { - let bitset = crate::pe::process_module(path, &data, &pe, false, handle)?; - bitset.ones().map(|off| off as u32).collect() - }; - - Ok(Self { module, blocks }) - } -} diff --git a/src/agent/coverage-legacy/src/cobertura.rs b/src/agent/coverage-legacy/src/cobertura.rs deleted file mode 100644 index ddf7d24b70..0000000000 --- a/src/agent/coverage-legacy/src/cobertura.rs +++ /dev/null @@ -1,679 +0,0 @@ -use crate::source::SourceCoverage; -use crate::source::SourceFileCoverage; -use anyhow::Context; -use anyhow::Error; -use anyhow::Result; -use quick_xml::writer::Writer; -use std::io::Cursor; -use std::path::Path; -use std::path::PathBuf; -use std::time::{SystemTime, UNIX_EPOCH}; - -pub struct LineValues { - pub valid_lines: u64, - pub hit_lines: u64, - pub line_rate: f64, -} - -impl LineValues { - pub fn new(valid_lines: u64, hit_lines: u64) -> Self { - let line_rate = if valid_lines == 0 { - 0.0 - } else { - (hit_lines as f64) / (valid_lines as f64) - }; - - Self { - valid_lines, - hit_lines, - line_rate, - } - } -} - -// compute line values (total) for coverage xml element -pub fn compute_line_values_coverage(files: &[SourceFileCoverage]) -> LineValues { - let mut valid_lines = 0; - let mut hit_lines = 0; - for file in files { - let file_line_values = compute_line_values_package(file); - valid_lines += file_line_values.valid_lines; - hit_lines += file_line_values.hit_lines; - } - LineValues::new(valid_lines, hit_lines) -} - -// compute line values for individual file package xml element -pub fn compute_line_values_package(file: &SourceFileCoverage) -> LineValues { - let mut valid_lines = 0; - let mut hit_lines = 0; - for location in &file.locations { - valid_lines += 1; - if location.count > 0 { - hit_lines += 1; - } - } - LineValues::new(valid_lines, hit_lines) -} -pub fn convert_path(file: &SourceFileCoverage) -> String { - file.file.replace('\\', "/").to_lowercase() -} - -// if full file name does not have / , keep full file name -pub fn get_file_name(file: &str) -> String { - let file_name = match file.split('/').next_back() { - Some(_file_name) => file.split('/').next_back().unwrap(), - None => file, - }; - file_name.to_string() -} - -// get directory of file if valid file path, otherwise make package name include and error message -pub fn get_parent_path(path_slash: &str) -> PathBuf { - let path = Path::new(&path_slash); - let none_message = "Invalid file format: ".to_owned() + path_slash; - let parent_path = match path.file_name() { - Some(_parent_path) => path.parent().unwrap(), - None => Path::new(&none_message), - }; - parent_path.to_path_buf() -} - -pub fn cobertura(source_coverage: SourceCoverage) -> Result { - let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); - - let unixtime = SystemTime::now() - .duration_since(UNIX_EPOCH) - .context("system time before unix epoch")? - .as_secs(); - - let coverage_line_values = compute_line_values_coverage(&source_coverage.files); - writer - .create_element("coverage") - .with_attributes([ - ( - "line-rate", - format!("{:.02}", coverage_line_values.line_rate).as_str(), - ), - ("branch-rate", "0"), - ( - "lines-covered", - coverage_line_values.hit_lines.to_string().as_str(), - ), - ( - "lines-valid", - coverage_line_values.valid_lines.to_string().as_str(), - ), - ("branches-covered", "0"), - ("branches-valid", "0"), - ("complexity", "0"), - ("version", "0.1"), - ("timestamp", unixtime.to_string().as_str()), - ]) - .write_inner_content(|writer| { - writer - .create_element("packages") - .write_inner_content(|writer| { - // path (excluding file name) is package name for better results with ReportGenerator - // class name is only file name (no path) - for file in &source_coverage.files { - write_file(writer, file)?; - } - - Ok(()) - })?; - - Ok(()) - })?; - - Ok(String::from_utf8(writer.into_inner().into_inner())?) -} - -fn write_file( - writer: &mut Writer>>, - file: &SourceFileCoverage, -) -> quick_xml::Result<()> { - let path = convert_path(file); - let parent_path = get_parent_path(&path); - let package_line_values = compute_line_values_package(file); - let class_name = get_file_name(&path); - - writer - .create_element("package") - .with_attributes([ - ("name", parent_path.display().to_string().as_str()), - ( - "line-rate", - format!("{:.02}", package_line_values.line_rate).as_str(), - ), - ("branch-rate", "0"), - ("complexity", "0"), - ]) - .write_inner_content(|writer| { - writer - .create_element("classes") - .write_inner_content(|writer| { - writer - .create_element("class") - .with_attributes([ - ("name", class_name.as_str()), - ("filename", path.as_str()), - ( - "line-rate", - format!("{:.02}", package_line_values.line_rate).as_str(), - ), - ("branch-rate", "0"), - ("complexity", "0"), - ]) - .write_inner_content(|writer| { - writer - .create_element("lines") - .write_inner_content(|writer| { - let line_locations = &file.locations; - for location in line_locations { - writer - .create_element("line") - .with_attributes([ - ("number", location.line.to_string().as_str()), - ("hits", location.count.to_string().as_str()), - ("branch", "false"), - ]) - .write_empty()?; - } - Ok(()) - })?; - Ok(()) - })?; - Ok(()) - })?; - Ok(()) - })?; - Ok(()) -} - -#[cfg(test)] - -mod tests { - use super::*; - use crate::source::SourceCoverageLocation; - use anyhow::Result; - use pretty_assertions::assert_eq; - - #[test] - fn test_cobertura_conversion_windows_to_posix_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:\\Users\\file1.txt".to_string(), - }; - - let path = convert_path(&file); - assert_eq!(&path, "c:/users/file1.txt"); - } - - #[test] - fn test_cobertura_conversion_windows_to_posix_parent_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:\\Users\\file1.txt".to_string(), - }; - - let path = convert_path(&file); - let parent_path = get_parent_path(&path); - assert_eq!(&(parent_path.display().to_string()), "c:/users"); - } - - #[test] - fn test_cobertura_conversion_posix_to_posix_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:/Users/file1.txt".to_string(), - }; - - let path = convert_path(&file); - - assert_eq!(&path, "c:/users/file1.txt"); - } - - #[test] - fn test_cobertura_conversion_posix_to_posix_parent_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:/Users/file1.txt".to_string(), - }; - - let path = convert_path(&file); - let parent_path = get_parent_path(&path); - - assert_eq!(&(parent_path.display().to_string()), "c:/users"); - } - - #[test] - fn test_cobertura_invalid_windows_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:\\Users\\file\\..".to_string(), - }; - - let path = convert_path(&file); - - assert_eq!(&path, "c:/users/file/.."); - } - - #[test] - fn test_cobertura_invalid_windows_parent_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:\\Users\\file\\..".to_string(), - }; - - let path = convert_path(&file); - let parent_path = get_parent_path(&path); - - assert_eq!( - &(parent_path.display().to_string()), - "Invalid file format: c:/users/file/.." - ); - } - - #[test] - fn test_cobertura_invalid_posix_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:/Users/file/..".to_string(), - }; - - let path = convert_path(&file); - assert_eq!(&path, "c:/users/file/.."); - } - - #[test] - fn test_cobertura_invalid_posix_parent_path() { - let coverage_locations_vec1 = vec![SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }]; - - let file = SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:/Users/file/..".to_string(), - }; - - let path = convert_path(&file); - let parent_path = get_parent_path(&path); - - assert_eq!( - &(parent_path.display().to_string()), - "Invalid file format: c:/users/file/.." - ); - } - - #[test] - fn test_cobertura_source_to_cobertura_mixed() -> Result<()> { - let coverage_locations_vec1 = vec![ - SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }, - SourceCoverageLocation { - line: 10, - column: None, - count: 0, - }, - ]; - - let coverage_locations_vec2 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 0, - }]; - - let coverage_locations_vec3 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 1, - }]; - - let coverage_locations_vec4 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 0, - }]; - - let file_coverage_vec1 = vec![ - SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:\\Users\\file1.txt".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec2, - file: "C:/Users/file2.txt".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec3, - file: "C:\\Users\\file\\..".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec4, - file: "C:/Users/file/..".to_string(), - }, - ]; - - let source_coverage_result = cobertura(SourceCoverage { - files: file_coverage_vec1, - }); - - let unixtime = SystemTime::now() - .duration_since(UNIX_EPOCH) - .context("system time before unix epoch")? - .as_secs(); - - let expected = format!( - r#" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"# - ); - - assert_eq!(source_coverage_result?, expected); - Ok(()) - } - - #[test] - fn test_cobertura_source_to_cobertura_posix_paths() -> Result<()> { - let coverage_locations_vec1 = vec![ - SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }, - SourceCoverageLocation { - line: 10, - column: None, - count: 0, - }, - ]; - - let coverage_locations_vec2 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 0, - }]; - - let coverage_locations_vec3 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 1, - }]; - - let coverage_locations_vec4 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 0, - }]; - - let file_coverage_vec1 = vec![ - SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:/Users/file1.txt".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec2, - file: "C:/Users/file2.txt".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec3, - file: "C:/Users/file/..".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec4, - file: "C:/Users/file/..".to_string(), - }, - ]; - - let source_coverage_result = cobertura(SourceCoverage { - files: file_coverage_vec1, - }); - - let unixtime = SystemTime::now() - .duration_since(UNIX_EPOCH) - .context("system time before unix epoch")? - .as_secs(); - - let expected = format!( - r#" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"# - ); - - assert_eq!(source_coverage_result?, expected); - Ok(()) - } - - #[test] - fn test_cobertura_source_to_cobertura_windows_paths() -> Result<()> { - let coverage_locations_vec1 = vec![ - SourceCoverageLocation { - line: 5, - column: None, - count: 3, - }, - SourceCoverageLocation { - line: 10, - column: None, - count: 0, - }, - ]; - - let coverage_locations_vec2 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 0, - }]; - - let coverage_locations_vec3 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 1, - }]; - - let coverage_locations_vec4 = vec![SourceCoverageLocation { - line: 1, - column: None, - count: 0, - }]; - - let file_coverage_vec1 = vec![ - SourceFileCoverage { - locations: coverage_locations_vec1, - file: "C:\\Users\\file1.txt".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec2, - file: "C:\\Users\\file2.txt".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec3, - file: "C:\\Users\\file\\..".to_string(), - }, - SourceFileCoverage { - locations: coverage_locations_vec4, - file: "C:\\Users\\file\\..".to_string(), - }, - ]; - - let source_coverage_result = cobertura(SourceCoverage { - files: file_coverage_vec1, - }); - - let unixtime = SystemTime::now() - .duration_since(UNIX_EPOCH) - .context("system time before unix epoch")? - .as_secs(); - - let expected = format!( - r#" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"# - ); - - assert_eq!(source_coverage_result?, expected); - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/cobertura_test.xml b/src/agent/coverage-legacy/src/cobertura_test.xml deleted file mode 100644 index a484b4e502..0000000000 --- a/src/agent/coverage-legacy/src/cobertura_test.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/agent/coverage-legacy/src/code.rs b/src/agent/coverage-legacy/src/code.rs deleted file mode 100644 index 32b5b8f0c9..0000000000 --- a/src/agent/coverage-legacy/src/code.rs +++ /dev/null @@ -1,443 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::borrow::Borrow; -use std::ffi::OsStr; -use std::fmt; -use std::ops::Range; -use std::path::{Path, PathBuf}; - -use anyhow::{bail, Result}; -use regex::RegexSet; -use serde::{Deserialize, Serialize}; - -use crate::filter::Filter; -use crate::region::{Region, RegionIndex}; - -/// `PathBuf` that is guaranteed to be canonicalized and have a file name. -#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] -#[serde(transparent)] -pub struct ModulePath { - path: PathBuf, -} - -impl fmt::Display for ModulePath { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.path.display()) - } -} - -impl ModulePath { - /// Validate that `path` is absolute and has a filename. - pub fn new(path: PathBuf) -> Result { - if path.is_relative() { - bail!("module path is not absolute"); - } - - if path.file_name().is_none() { - bail!("module path has no file name"); - } - - Ok(Self { path }) - } - - pub fn existing(path: impl AsRef) -> Result { - let path = dunce::canonicalize(path)?; - Self::new(path) - } - - pub fn path(&self) -> &Path { - &self.path - } - - pub fn path_lossy(&self) -> String { - self.path.to_string_lossy().into_owned() - } - - pub fn name(&self) -> &OsStr { - // Unwrap checked in constructor. - self.path.file_name().unwrap() - } - - pub fn name_lossy(&self) -> String { - self.name().to_string_lossy().into_owned() - } -} - -impl AsRef for ModulePath { - fn as_ref(&self) -> &Path { - self.path() - } -} - -impl AsRef for ModulePath { - fn as_ref(&self) -> &OsStr { - self.path().as_ref() - } -} - -impl Borrow for ModulePath { - fn borrow(&self) -> &Path { - self.path() - } -} - -impl From for PathBuf { - fn from(module_path: ModulePath) -> PathBuf { - module_path.path - } -} - -/// Index over an executable module and its symbols. -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct ModuleIndex { - /// Absolute path to the module's backing file. - pub path: ModulePath, - - /// Preferred virtual address of the module's base image. - pub base_va: u64, - - /// Index over the module's symbols. - pub symbols: SymbolIndex, -} - -impl ModuleIndex { - /// Build a new index over a parsed ELF module. - #[cfg(target_os = "linux")] - pub fn index_elf(path: ModulePath, elf: &goblin::elf::Elf) -> Result { - use anyhow::format_err; - use goblin::elf::program_header::PT_LOAD; - - // Calculate the module base address as the lowest preferred VA of any loadable segment. - // - // https://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html#base_address - let base_va = elf - .program_headers - .iter() - .filter(|h| h.p_type == PT_LOAD) - .map(|h| h.p_vaddr) - .min() - .ok_or_else(|| format_err!("no loadable segments for ELF object ({})", path))?; - - let mut symbols = SymbolIndex::default(); - - for sym in elf.syms.iter() { - if sym.st_size == 0 { - log::debug!("skipping size 0 symbol: {:x?}", sym); - continue; - } - - if sym.is_function() { - let name = match elf.strtab.get_at(sym.st_name) { - None => { - log::error!("symbol not found in symbol string table: {:?}", sym); - continue; - } - Some(name) => name.to_owned(), - }; - - // For executables and shared objects, `st_value` contains the VA of the symbol. - // - // https://refspecs.linuxbase.org/elf/gabi4+/ch4.symtab.html#symbol_value - let sym_va = sym.st_value; - - // The module-relative offset of the mapped symbol is immediate. - let image_offset = sym_va - base_va; - - // We want to make it easy to read the symbol from the file on disk. To do this, we - // need to compute its file offset. - // - // A symbol is defined relative to some section, identified by `st_shndx`, an index - // into the section header table. We'll use the section header to compute the file - // offset of the symbol. - let section = elf - .section_headers - .get(sym.st_shndx) - .cloned() - .ok_or_else(|| format_err!("invalid section table index for symbol"))?; - - // If mapped into a segment, `sh_addr` contains the VA of the section image, - // consistent with the `p_vaddr` of the segment. - // - // https://refspecs.linuxbase.org/elf/gabi4+/ch4.sheader.html#section_header - let section_va = section.sh_addr; - - // The offset of the symbol relative to its section (both in-file and when mapped). - let sym_section_offset = sym_va - section_va; - - // We have the file offset for the section via `sh_offset`, and the offset of the - // symbol within the section. From this, calculate the file offset for the symbol, - // which we can use to index into `data`. - let sym_file_offset = section.sh_offset + sym_section_offset; - - let symbol = Symbol::new(name, sym_file_offset, image_offset, sym.st_size); - - match symbol { - Ok(entry) => { - let inserted = symbols.index.insert(entry.clone()); - if !inserted { - log::error!("failed to insert symbol index entry: {:x?}", entry); - } - } - Err(err) => { - log::error!("invalid symbol: err = {}", err); - } - } - } - } - - Ok(Self { - path, - base_va, - symbols, - }) - } - - /// Build a new index over a parsed PE module. - #[cfg(target_os = "windows")] - pub fn index_pe(path: ModulePath, pe: &goblin::pe::PE) -> Self { - let base_va = pe.image_base as u64; - - // Not yet implemented. - let symbols = SymbolIndex::default(); - - Self { - path, - base_va, - symbols, - } - } -} - -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -#[serde(transparent)] -pub struct SymbolIndex { - pub index: RegionIndex, -} - -impl SymbolIndex { - pub fn iter(&self) -> impl Iterator { - self.index.iter() - } - - /// Find the symbol metadata for the image-relative `offset`. - pub fn find(&self, offset: u64) -> Option<&Symbol> { - self.index.find(offset) - } -} - -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -pub struct Symbol { - /// Raw symbol name, possibly mangled. - pub name: String, - - /// File offset of the symbol definition in the on-disk module. - pub file_offset: u64, - - /// Module-relative offset of the mapped symbol. - pub image_offset: u64, - - /// Total size in bytes of the symbol definition. - pub size: u64, -} - -impl Symbol { - pub fn new(name: String, file_offset: u64, image_offset: u64, size: u64) -> Result { - if name.is_empty() { - bail!("symbol name cannot be empty"); - } - - if size == 0 { - bail!("symbol size must be nonzero"); - } - - if file_offset.checked_add(size).is_none() { - bail!("symbol size must not overflow file offset"); - } - - if image_offset.checked_add(size).is_none() { - bail!("symbol size must not overflow image offset"); - } - - Ok(Self { - name, - file_offset, - image_offset, - size, - }) - } - - pub fn file_range(&self) -> Range { - let lo = self.file_offset; - - // Overflow checked in constructor. - let hi = lo + self.size; - - lo..hi - } - - pub fn file_range_usize(&self) -> Range { - let lo = self.file_offset as usize; - - // Overflow checked in constructor. - let hi = lo + (self.size as usize); - - lo..hi - } - - pub fn image_range(&self) -> Range { - let lo = self.image_offset; - let hi = lo + self.size; - lo..hi - } - - pub fn contains_file_offset(&self, offset: u64) -> bool { - self.file_range().contains(&offset) - } - - pub fn contains_image_offset(&self, offset: u64) -> bool { - self.image_range().contains(&offset) - } -} - -/// Symbol metadata defines a `Region` relative to its process image. -impl Region for Symbol { - fn base(&self) -> u64 { - self.image_offset - } - - fn size(&self) -> u64 { - self.size - } -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -#[serde(transparent)] -pub struct CmdFilterDef { - defs: Vec, -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -struct ModuleRuleDef { - pub module: String, - - #[serde(flatten)] - pub rule: RuleDef, -} - -/// User-facing encoding of a module-tracking rule. -/// -/// We use an intermediate type to expose a rich and easily-updated user-facing -/// format for expressing rules, while decoupling the `serde` machinery from the -/// normalized type used for business logic. -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(untagged)] -enum RuleDef { - Include { - include: bool, - }, - Exclude { - exclude: bool, - }, - - // Temporarily disable symbol filtering rules. - #[cfg_attr(not(feature = "symbol-filter"), serde(skip), allow(unused))] - Filter(Box), -} - -/// A normalized encoding of a module-tracking rule. -#[derive(Clone, Debug)] -enum Rule { - /// Asserts that the entire module should be be tracked (and its symbols - /// included), or ignored, and its symbols excluded. - /// - /// The implied symbol tracking behavior could be encoded by a filter, but a - /// distinction at this level lets us avoid parsing modules that we want to - /// ignore. - IncludeModule(bool), - - /// The entire module should be tracked and parsed, with a filter applied to - /// its symbols. - FilterSymbols(Box), -} - -impl From for Rule { - fn from(def: RuleDef) -> Self { - match def { - RuleDef::Exclude { exclude } => Rule::IncludeModule(!exclude), - RuleDef::Include { include } => Rule::IncludeModule(include), - RuleDef::Filter(filter) => Rule::FilterSymbols(filter), - } - } -} - -/// Module and symbol-tracking rules to be applied to a command. -#[derive(Clone, Debug)] -pub struct CmdFilter { - regexes: RegexSet, - rules: Vec, -} - -impl CmdFilter { - pub fn new(cmd: CmdFilterDef) -> Result { - let mut modules = vec![]; - let mut rules = vec![]; - - for def in cmd.defs { - modules.push(def.module); - rules.push(def.rule.into()); - } - - let regexes = RegexSet::new(&modules)?; - - Ok(Self { regexes, rules }) - } - - pub fn includes_module(&self, module: &ModulePath) -> bool { - match self.regexes.matches(&module.path_lossy()).iter().next() { - Some(index) => { - // In-bounds by construction. - match &self.rules[index] { - Rule::IncludeModule(included) => *included, - Rule::FilterSymbols(_) => { - // A filtered module is implicitly tracked. - true - } - } - } - None => { - // Track modules by default. - true - } - } - } - - pub fn includes_symbol(&self, module: &ModulePath, symbol: impl AsRef) -> bool { - match self.regexes.matches(&module.path_lossy()).iter().next() { - Some(index) => { - // In-bounds by construction. - match &self.rules[index] { - Rule::IncludeModule(included) => *included, - Rule::FilterSymbols(filter) => filter.includes(symbol.as_ref()), - } - } - None => { - // Include symbols by default. - true - } - } - } -} - -impl Default for CmdFilter { - fn default() -> Self { - let def = CmdFilterDef::default(); - - // An empty set of filter definitions has no regexes, which means when - // constructing, we never internally risk compiling an invalid regex. - Self::new(def).expect("unreachable") - } -} - -#[cfg(test)] -mod tests; diff --git a/src/agent/coverage-legacy/src/code/tests.rs b/src/agent/coverage-legacy/src/code/tests.rs deleted file mode 100644 index 938fcea9ee..0000000000 --- a/src/agent/coverage-legacy/src/code/tests.rs +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use super::*; - -#[test] -fn test_module_filter_def_include_bool() { - let text = r#"{ "module": "abc.exe", "include": true }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - assert_eq!(def.module, "abc.exe"); - assert!(matches!(def.rule, RuleDef::Include { include: true })); - - let text = r#"{ "module": "abc.exe", "include": false }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - assert_eq!(def.module, "abc.exe"); - assert!(matches!(def.rule, RuleDef::Include { include: false })); -} - -#[test] -fn test_module_filter_def_exclude_bool() { - let text = r#"{ "module": "abc.exe", "exclude": true }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - assert_eq!(def.module, "abc.exe"); - assert!(matches!(def.rule, RuleDef::Exclude { exclude: true })); - - let text = r#"{ "module": "abc.exe", "exclude": false }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - assert_eq!(def.module, "abc.exe"); - assert!(matches!(def.rule, RuleDef::Exclude { exclude: false })); -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_module_filter_def_include_filter() { - let text = r#"{ "module": "abc.exe", "include": [] }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - assert_eq!(def.module, "abc.exe"); - - if let RuleDef::Filter(filter) = def.rule { - assert!(matches!(*filter, Filter::Include(_))); - } else { - panic!("expected a `Filter` rule"); - } - - let text = r#"{ "module": "abc.exe", "include": [ "^parse_data$" ] }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - assert_eq!(def.module, "abc.exe"); - - if let RuleDef::Filter(filter) = def.rule { - assert!(matches!(*filter, Filter::Include(_))); - } else { - panic!("expected a `Filter` rule"); - } -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_module_filter_def_exclude_filter() { - let text = r#"{ "module": "abc.exe", "exclude": [] }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - if let RuleDef::Filter(filter) = def.rule { - assert!(matches!(*filter, Filter::Exclude(_))); - } else { - panic!("expected a `Filter` rule"); - } - - let text = r#"{ "module": "abc.exe", "exclude": [ "^parse_data$" ] }"#; - let def: ModuleRuleDef = serde_json::from_str(text).unwrap(); - - if let RuleDef::Filter(filter) = def.rule { - assert!(matches!(*filter, Filter::Exclude(_))); - } else { - panic!("expected a `Filter` rule"); - } -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_include_exclude() { - let include_false = Rule::from(RuleDef::Include { include: false }); - assert!(matches!(include_false, Rule::IncludeModule(false))); - - let exclude_true = Rule::from(RuleDef::Exclude { exclude: true }); - assert!(matches!(exclude_true, Rule::IncludeModule(false))); - - let include_true = Rule::from(RuleDef::Include { include: true }); - assert!(matches!(include_true, Rule::IncludeModule(true))); - - let exclude_false = Rule::from(RuleDef::Exclude { exclude: false }); - assert!(matches!(exclude_false, Rule::IncludeModule(true))); -} - -#[cfg(feature = "symbol-filter")] -macro_rules! from_json { - ($tt: tt) => {{ - let text = stringify!($tt); - let def: CmdFilterDef = - serde_json::from_str(text).expect("static test data was invalid JSON"); - CmdFilter::new(def).expect("static test JSON was invalid") - }}; -} - -#[cfg(feature = "symbol-filter")] -#[cfg(target_os = "windows")] -const EXE: &str = r"c:\bin\fuzz.exe"; - -#[cfg(feature = "symbol-filter")] -#[cfg(target_os = "linux")] -const EXE: &str = "/bin/fuzz.exe"; - -#[cfg(feature = "symbol-filter")] -#[cfg(target_os = "windows")] -const LIB: &str = r"c:\lib\libpthread.dll"; - -#[cfg(feature = "symbol-filter")] -#[cfg(target_os = "linux")] -const LIB: &str = "/lib/libpthread.so.0"; - -#[cfg(feature = "symbol-filter")] -fn module(s: &str) -> ModulePath { - ModulePath::new(s.into()).unwrap() -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_cmd_filter_empty_def() { - let filter = from_json!([]); - - // All modules and symbols are included by default. - - let exe = module(EXE); - assert!(filter.includes_module(&exe)); - assert!(filter.includes_symbol(&exe, "main")); - assert!(filter.includes_symbol(&exe, "_start")); - assert!(filter.includes_symbol(&exe, "LLVMFuzzerTestOneInput")); - assert!(filter.includes_symbol(&exe, "__asan_memcpy")); - assert!(filter.includes_symbol(&exe, "__asan_load8")); - - let lib = module(LIB); - assert!(filter.includes_module(&lib)); - assert!(filter.includes_symbol(&lib, "pthread_join")); - assert!(filter.includes_symbol(&lib, "pthread_yield")); -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_cmd_filter_module_include_list() { - let filter = from_json!([ - { - "module": "fuzz.exe$", - "include": ["^main$", "LLVMFuzzerTestOneInput"] - } - ]); - - // The filtered module and its matching symbols are included. - let exe = module(EXE); - assert!(filter.includes_module(&exe)); - assert!(!filter.includes_symbol(&exe, "_start")); - assert!(filter.includes_symbol(&exe, "main")); - assert!(filter.includes_symbol(&exe, "LLVMFuzzerTestOneInput")); - assert!(!filter.includes_symbol(&exe, "__asan_memcpy")); - assert!(!filter.includes_symbol(&exe, "__asan_load8")); - - // Other modules and their symbols are included by default. - let lib = module(LIB); - assert!(filter.includes_module(&lib)); - assert!(filter.includes_symbol(&lib, "pthread_join")); - assert!(filter.includes_symbol(&lib, "pthread_yield")); - assert!(filter.includes_symbol(&lib, "__asan_memcpy")); - assert!(filter.includes_symbol(&lib, "__asan_load8")); -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_cmd_filter_exclude_list() { - let filter = from_json!([ - { - "module": "fuzz.exe$", - "exclude": ["^_start", "^__asan"] - } - ]); - - // The filtered module is included, and its matching symbols are excluded. - let exe = module(EXE); - assert!(filter.includes_module(&exe)); - assert!(!filter.includes_symbol(&exe, "_start")); - assert!(filter.includes_symbol(&exe, "main")); - assert!(filter.includes_symbol(&exe, "LLVMFuzzerTestOneInput")); - assert!(!filter.includes_symbol(&exe, "__asan_memcpy")); - assert!(!filter.includes_symbol(&exe, "__asan_load8")); - assert!(!filter.includes_symbol(&exe, "_start")); - - // Other modules and their symbols are included by default. - let lib = module(LIB); - assert!(filter.includes_module(&lib)); - assert!(filter.includes_symbol(&lib, "pthread_join")); - assert!(filter.includes_symbol(&lib, "pthread_yield")); - assert!(filter.includes_symbol(&lib, "__asan_memcpy")); - assert!(filter.includes_symbol(&lib, "__asan_load8")); -} - -#[cfg(feature = "symbol-filter")] -#[test] -fn test_cmd_filter_include_list_and_exclude_default() { - // The 2nd rule in this list excludes all modules and symbols not explicitly - // included in the 1st rule. - let filter = from_json!([ - { - "module": "fuzz.exe$", - "include": ["^main$", "LLVMFuzzerTestOneInput"] - }, - { - "module": ".*", - "exclude": true - } - ]); - - // The filtered module is included, and only matching rules are included. - let exe = module(EXE); - assert!(filter.includes_module(&exe)); - assert!(!filter.includes_symbol(&exe, "_start")); - assert!(filter.includes_symbol(&exe, "main")); - assert!(filter.includes_symbol(&exe, "LLVMFuzzerTestOneInput")); - assert!(!filter.includes_symbol(&exe, "__asan_memcpy")); - assert!(!filter.includes_symbol(&exe, "__asan_load8")); - - // Other modules and their symbols are excluded by default. - let lib = module(LIB); - assert!(!filter.includes_module(&lib)); - assert!(!filter.includes_symbol(&lib, "pthread_yield")); - assert!(!filter.includes_symbol(&lib, "pthread_join")); - assert!(!filter.includes_symbol(&lib, "__asan_memcpy")); - assert!(!filter.includes_symbol(&lib, "__asan_load8")); -} diff --git a/src/agent/coverage-legacy/src/debuginfo.rs b/src/agent/coverage-legacy/src/debuginfo.rs deleted file mode 100644 index 9b7eb1bf3b..0000000000 --- a/src/agent/coverage-legacy/src/debuginfo.rs +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::{HashMap, HashSet}; -use std::fs; -use std::io; -use std::path::{Path, PathBuf}; - -use anyhow::Result; -use symbolic::{ - debuginfo::Object, - symcache::{SymCache, SymCacheWriter}, -}; - -#[cfg(windows)] -use goblin::pe::PE; - -#[cfg(windows)] -use symbolic::debuginfo::pe; - -/// Caching provider of debug info for executable code modules. -#[derive(Default)] -pub struct DebugInfo { - // Cached debug info, keyed by module path. - modules: HashMap, - - // Set of module paths known to lack debug info. - no_debug_info: HashSet, -} - -impl DebugInfo { - /// Try to load debug info for a module. - /// - /// If debug info was founded and loaded (now or previously), returns - /// `true`. If the module does not have debug info, returns `false`. - pub fn load_module(&mut self, module: PathBuf) -> Result { - if self.no_debug_info.contains(&module) { - return Ok(false); - } - - if self.modules.get(&module).is_some() { - return Ok(true); - } - - let info = match ModuleDebugInfo::load(&module)? { - Some(info) => info, - None => { - self.no_debug_info.insert(module); - return Ok(false); - } - }; - - self.modules.insert(module, info); - - Ok(true) - } - - /// Fetch debug info for `module`, if loaded. - /// - /// Does not attempt to load debug info for the module. - pub fn get(&self, module: impl AsRef) -> Option<&ModuleDebugInfo> { - self.modules.get(module.as_ref()) - } -} - -/// Debug info for a single executable module. -pub struct ModuleDebugInfo { - /// Backing debug info file data for the module. - /// - /// May not include the actual executable code. - pub object: Object<'static>, - - /// Cache which allows efficient source line lookups. - pub source: SymCache<'static>, -} - -impl ModuleDebugInfo { - /// Load debug info for a module. - /// - /// Returns `None` when the module was found and loadable, but no matching - /// debug info could be found. - /// - /// Leaks module and symbol data. - fn load(module: &Path) -> Result> { - // Used when `cfg(windows)`. - #[allow(unused_mut)] - let mut data = fs::read(module)?.into_boxed_slice(); - - // Conditional so we can use `dbghelp`. - #[cfg(windows)] - { - // If our module is a PE file, the debug info will be in the PDB. - // - // We will need a similar check to support split DWARF. - let is_pe = pe::PeObject::test(&data); - if is_pe { - let pe = PE::parse(&data)?; - - // Search the symbol path for a PDB for this PE, which we'll use instead. - if let Some(pdb) = crate::pdb::find_pdb_path(module, &pe, None)? { - data = fs::read(pdb)?.into_boxed_slice(); - } - } - } - - // Now we're more sure we want this data. Leak it so the parsed object - // will have a `static` lifetime. - let data = Box::leak(data); - - // Save a raw pointer to the file data. If object parsing fails, or - // there is no debuginfo, we will use this to avoid leaking memory. - let data_ptr = data as *mut _; - - let object = match Object::parse(data) { - Ok(object) => { - if !object.has_debug_info() { - // Drop the object to drop its static references to the leaked data. - drop(object); - - // Reconstruct to free data on drop. - // - // Safety: we leaked this box locally, and only `object` had a reference to it - // via `Object::parse()`. We manually dropped `object`, so the raw pointer is no - // longer aliased. - unsafe { - drop(Box::from_raw(data_ptr)); - } - - return Ok(None); - } - - object - } - Err(err) => { - // Reconstruct to free data on drop. - // - // Safety: we leaked this box locally, and only passed the leaked ref once, to - // `Object::parse()`. In this branch, it returned an `ObjectError`, which does not - // hold a reference to the leaked data. The raw pointer is no longer aliased, so we - // can both free its referent and also return the error. - unsafe { - drop(Box::from_raw(data_ptr)); - } - - return Err(err.into()); - } - }; - - let cursor = io::Cursor::new(vec![]); - let cursor = SymCacheWriter::write_object(&object, cursor)?; - let cache_data = Box::leak(cursor.into_inner().into_boxed_slice()); - - // Save a raw pointer to the cache data. If cache parsing fails, we will use this to - // avoid leaking memory. - let cache_data_ptr = cache_data as *mut _; - - match SymCache::parse(cache_data) { - Ok(source) => Ok(Some(Self { object, source })), - Err(err) => { - // Reconstruct to free data on drop. - // - // Safety: we leaked this box locally, and only passed the leaked ref once, to - // `SymCache::parse()`. In this branch, it returned a `SymCacheError`, which does - // not hold a reference to the leaked data. The pointer is no longer aliased, so we - // can both free its referent and also return the error. - unsafe { - drop(Box::from_raw(cache_data_ptr)); - } - - Err(err.into()) - } - } - } -} diff --git a/src/agent/coverage-legacy/src/demangle.rs b/src/agent/coverage-legacy/src/demangle.rs deleted file mode 100644 index fdead863f1..0000000000 --- a/src/agent/coverage-legacy/src/demangle.rs +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::{format_err, Result}; - -#[derive(Clone, Copy, Debug)] -pub struct ItaniumDemangler { - options: cpp_demangle::DemangleOptions, -} - -impl ItaniumDemangler { - pub fn try_demangle(&self, raw: impl AsRef) -> Result { - let symbol = cpp_demangle::Symbol::new(raw.as_ref())?; - Ok(symbol.demangle(&self.options)?) - } -} - -impl Default for ItaniumDemangler { - fn default() -> Self { - let options = cpp_demangle::DemangleOptions::new() - .no_params() - .no_return_type(); - Self { options } - } -} - -#[derive(Clone, Copy, Debug)] -pub struct MsvcDemangler { - flags: msvc_demangler::DemangleFlags, -} - -impl MsvcDemangler { - pub fn try_demangle(&self, raw: impl AsRef) -> Result { - Ok(msvc_demangler::demangle(raw.as_ref(), self.flags)?) - } -} - -impl Default for MsvcDemangler { - fn default() -> Self { - // Equivalent to `undname 0x1000`. - let flags = msvc_demangler::DemangleFlags::NAME_ONLY; - Self { flags } - } -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct RustcDemangler; - -impl RustcDemangler { - pub fn try_demangle(&self, raw: impl AsRef) -> Result { - let name = rustc_demangle::try_demangle(raw.as_ref()) - .map_err(|_| format_err!("unable to demangle rustc name"))?; - - // Alternate formatter discards trailing hash. - Ok(format!("{name:#}")) - } -} - -/// Demangler that tries to demangle a raw name against each known scheme. -#[derive(Clone, Copy, Debug, Default)] -pub struct Demangler { - itanium: ItaniumDemangler, - msvc: MsvcDemangler, - rustc: RustcDemangler, -} - -impl Demangler { - /// Try to demangle a raw name according to a set of known schemes. - /// - /// The following schemes are tried in-order: - /// 1. rustc - /// 2. Itanium - /// 3. MSVC - /// - /// The first scheme to provide some demangling is used. If the name does - /// not parse against any of the known schemes, return `None`. - pub fn demangle(&self, raw: impl AsRef) -> Option { - let raw = raw.as_ref(); - - // Try `rustc` demangling first. - // - // Ensures that if a name _also_ demangles against the Itanium scheme, - // we are sure to remove the hash suffix from the demangled name. - if let Ok(demangled) = self.rustc.try_demangle(raw) { - return Some(demangled); - } - - if let Ok(demangled) = self.itanium.try_demangle(raw) { - return Some(demangled); - } - - if let Ok(demangled) = self.msvc.try_demangle(raw) { - return Some(demangled); - } - - None - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_demangler_itanium_llvm() { - let test_cases = &[ - ( - "_ZN11__sanitizer20SizeClassAllocator64IN6__asan4AP64INS_21LocalAddressSpaceViewEEEE21ReleaseFreeMemoryToOSINS5_12MemoryMapperEEEvPjmmmPT_", - "__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView> >::ReleaseFreeMemoryToOS<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView> >::MemoryMapper>", - ), - ( - "_ZN11__sanitizer14ThreadRegistry23FindThreadContextLockedEPFbPNS_17ThreadContextBaseEPvES3_", - "__sanitizer::ThreadRegistry::FindThreadContextLocked", - ), - ( - "_ZN7Greeter5GreetEi", - "Greeter::Greet", - ), - ( - "_ZN7Greeter5GreetEv", - "Greeter::Greet", - ), - ( - "_ZN7Greeter5GreetERNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE", - "Greeter::Greet", - ), - ( - "_ZN7NothingIPvE3NopES0_", - "Nothing::Nop", - ), - ( - "_ZN7NothingIiE3NopEi", - "Nothing::Nop", - ), - ( - "_ZN7NothingIRdE3NopES0_", - "Nothing::Nop", - ), - ]; - - let demangler = Demangler::default(); - - for (mangled, demangled) in test_cases { - let name = demangler - .demangle(mangled) - .unwrap_or_else(|| panic!("demangling error: {}", mangled)); - assert_eq!(&name, demangled); - } - - assert!(demangler.demangle("main").is_none()); - assert!(demangler.demangle("_some_function").is_none()); - } - - #[test] - fn test_demangler_msvc() { - let test_cases = &[ - ( - "?Greet@Greeter@@QEAAXXZ", - "Greeter::Greet", - ), - ( - "?Greet@Greeter@@QEAAXH@Z", - "Greeter::Greet", - ), - ( - "?Greet@Greeter@@QEAAXAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z", - "Greeter::Greet", - ), - ( - "?Nop@?$Nothing@H@@QEAAXH@Z", - "Nothing::Nop", - ), - ( - "?Nop@?$Nothing@AEAN@@QEAAXAEAN@Z", - "Nothing::Nop", - ), - ( - "?Nop@?$Nothing@PEAX@@QEAAXPEAX@Z", - "Nothing::Nop", - ), - ]; - - let demangler = Demangler::default(); - - for (mangled, demangled) in test_cases { - let name = demangler - .demangle(mangled) - .unwrap_or_else(|| panic!("demangling error: {}", mangled)); - assert_eq!(&name, demangled); - } - - assert!(demangler.demangle("main").is_none()); - assert!(demangler.demangle("_some_function").is_none()); - } - - #[test] - fn test_demangler_rustc() { - let test_cases = &[ - ( - "_ZN3std2io5stdio9set_panic17hcf1e5c38cefca0deE", - "std::io::stdio::set_panic", - ), - ( - "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$i64$GT$3fmt17h7ebe6c0818892343E", - "core::fmt::num::::fmt", - ), - ]; - - let demangler = Demangler::default(); - - for (mangled, demangled) in test_cases { - let name = demangler - .demangle(mangled) - .unwrap_or_else(|| panic!("demangling error: {}", mangled)); - assert_eq!(&name, demangled); - } - - assert!(demangler.demangle("main").is_none()); - assert!(demangler.demangle("_some_function").is_none()); - } -} diff --git a/src/agent/coverage-legacy/src/disasm.rs b/src/agent/coverage-legacy/src/disasm.rs deleted file mode 100644 index 746d35493f..0000000000 --- a/src/agent/coverage-legacy/src/disasm.rs +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeSet; -use std::convert::TryInto; - -use anyhow::{bail, format_err, Context, Result}; -use iced_x86::{Decoder, DecoderOptions, Instruction}; - -use crate::code::{ModuleIndex, Symbol}; - -pub struct ModuleDisassembler<'a> { - module: &'a ModuleIndex, - data: &'a [u8], -} - -impl<'a> ModuleDisassembler<'a> { - pub fn new(module: &'a ModuleIndex, data: &'a [u8]) -> Result { - Ok(Self { module, data }) - } - - /// Find block entry points for every symbol in the module. - pub fn find_blocks(&self) -> BTreeSet { - let mut blocks = BTreeSet::new(); - - for symbol in self.module.symbols.iter() { - if let Err(err) = self.insert_symbol_blocks(&mut blocks, symbol) { - log::error!( - "error disassembling blocks for symbol, err = {}, symbol = {:x?}", - err, - symbol - ); - } - } - - blocks - } - - /// Find all entry points for blocks contained within the region of `symbol`. - fn insert_symbol_blocks(&self, blocks: &mut BTreeSet, symbol: &Symbol) -> Result<()> { - // Slice the symbol's instruction data from the module file data. - let data = if let Some(data) = self.data.get(symbol.file_range_usize()) { - data - } else { - bail!("data cannot contain file region for symbol"); - }; - - // Initialize a decoder for the current symbol. - let mut decoder = Decoder::new(64, data, DecoderOptions::NONE); - - // Compute the VA of the symbol, assuming preferred module base VA. - let va = self - .module - .base_va - .checked_add(symbol.image_offset) - .ok_or_else(|| format_err!("symbol image offset overflowed base VA"))?; - decoder.set_ip(va); - - // Function entry is a leader. - blocks.insert(symbol.image_offset.try_into()?); - - let mut inst = Instruction::default(); - while decoder.can_decode() { - decoder.decode_out(&mut inst); - - if let Some((target_va, conditional)) = branch_target(&inst) { - let offset = target_va - self.module.base_va; - - // The branch target is a leader, if it is intra-procedural. - if symbol.contains_image_offset(offset) { - blocks.insert(offset.try_into().context("ELF offset overflowed `u32`")?); - } - - // Only mark the fallthrough instruction as a leader if the branch is conditional. - // This will give an invalid basic block decomposition if the leaders we emit are - // used as delimiters. In particular, blocks that end with a `jmp` will be too - // large, and have an unconditional branch mid-block. - // - // However, we only care about the leaders as block entry points, so we can set - // software breakpoints. These maybe-unreachable leaders are a liability wrt - // mutating the running process' code, so we discard them for now. - if conditional { - // The next instruction is a leader, if it exists. - if decoder.can_decode() { - // We decoded the current instruction, so the decoder offset is - // set to the next instruction. - let next = decoder.ip(); - let next_offset = - if let Some(offset) = next.checked_sub(self.module.base_va) { - offset.try_into().context("ELF offset overflowed `u32`")? - } else { - anyhow::bail!("underflow converting ELF VA to offset") - }; - - blocks.insert(next_offset); - } - } - } - } - - Ok(()) - } -} - -// Returns the virtual address of a branch target, if present, with a flag that -// is true when the branch is conditional. -fn branch_target(inst: &Instruction) -> Option<(u64, bool)> { - use iced_x86::FlowControl; - - match inst.flow_control() { - FlowControl::ConditionalBranch => Some((inst.near_branch_target(), true)), - FlowControl::UnconditionalBranch => Some((inst.near_branch_target(), false)), - FlowControl::Call - | FlowControl::Exception - | FlowControl::IndirectBranch - | FlowControl::IndirectCall - | FlowControl::Interrupt - | FlowControl::Next - | FlowControl::Return - | FlowControl::XbeginXabortXend => None, - } -} diff --git a/src/agent/coverage-legacy/src/elf.rs b/src/agent/coverage-legacy/src/elf.rs deleted file mode 100644 index 07d4b654be..0000000000 --- a/src/agent/coverage-legacy/src/elf.rs +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeSet; -use std::convert::{TryFrom, TryInto}; - -use anyhow::{format_err, Result}; -use goblin::elf::{ - program_header::PT_LOAD, section_header::SectionHeader, sym::STT_NOTYPE, Elf, Sym, -}; - -use crate::sancov::{SancovDelimiters, SancovInlineAccessScanner, SancovTable}; - -#[derive(Clone, Copy, Debug)] -pub struct ElfContext<'d, 'e> { - pub base: u64, - pub data: &'d [u8], - pub elf: &'e Elf<'e>, -} - -impl<'d, 'e> ElfContext<'d, 'e> { - pub fn new(data: &'d [u8], elf: &'e Elf<'e>) -> Result { - // Find the virtual address of the lowest loadable segment. - let base = elf - .program_headers - .iter() - .filter(|h| h.p_type == PT_LOAD) - .map(|h| h.p_vaddr) - .min() - .ok_or_else(|| format_err!("no loadable segments"))?; - - Ok(Self { base, data, elf }) - } - - pub fn try_symbol_name(&self, sym: &Sym) -> Result { - let name = self - .elf - .strtab - .get_at(sym.st_name) - .ok_or_else(|| format_err!("symbol index out of bounds: {}", sym.st_name))? - .to_owned(); - - Ok(name) - } - - /// Convert a virtual address to an offset into the module's backing file. - pub fn va_to_file_offset(&self, va: u64, section_index: Option) -> Result { - let section = self.try_find_section_for_va(va, section_index)?; - - // VA of mapped section. - let section_va = section.sh_addr; - - // Offset of `va` from the mapped section VA. - let va_section_offset = va - .checked_sub(section_va) - .ok_or_else(|| format_err!("underflow computing virtual offset from section"))?; - - // The value of `va_section_offset` is the same in-memory and on-disk. - // We calculated it using VAs, but we can apply it to the section's file - // offset to get the file offset of the converted VA. - let file_offset = section.sh_offset + va_section_offset; - - Ok(file_offset.try_into()?) - } - - /// Convert a virtual address to a module-relative virtual memory offset. - pub fn va_to_vm_offset(&self, va: u64) -> Result { - let offset: u32 = va - .checked_sub(self.base) - .ok_or_else(|| { - format_err!( - "underflow computing image offset: va = {:?}, base = {:x}", - va, - self.base, - ) - })? - .try_into()?; - - Ok(offset) - } - - /// Try to find the section that contains the VA, if any. - /// - /// If passed an optional index to a section header which should contain the - /// VA, try to resolve it and check the VM bounds. - fn try_find_section_for_va(&self, va: u64, index: Option) -> Result<&SectionHeader> { - // Convert for use with `SectionHeader::vm_range()`. - let va: usize = va.try_into()?; - - let section = if let Some(index) = index { - // If given an index, return the denoted section if it exists and contains the VA. - let section = self - .elf - .section_headers - .get(index) - .ok_or_else(|| format_err!("section index out of bounds: {}", index))?; - - if !section.vm_range().contains(&va) { - anyhow::bail!("VA not in section range: {:x}", va); - } - - section - } else { - // If not given an index, try to find a containing section. - self.elf - .section_headers - .iter() - .find(|s| s.vm_range().contains(&va)) - .ok_or_else(|| format_err!("VA not contained in any section: {:x}", va))? - }; - - Ok(section) - } -} - -pub struct ElfSancovBasicBlockProvider<'d, 'e> { - ctx: ElfContext<'d, 'e>, - check_pc_table: bool, -} - -impl<'d, 'e> ElfSancovBasicBlockProvider<'d, 'e> { - pub fn new(ctx: ElfContext<'d, 'e>) -> Self { - let check_pc_table = true; - Self { - ctx, - check_pc_table, - } - } - - pub fn set_check_pc_table(&mut self, check: bool) { - self.check_pc_table = check; - } - - pub fn provide(&mut self) -> Result> { - let mut visitor = DelimiterVisitor::new(self.ctx); - - for sym in self.ctx.elf.syms.iter() { - if let STT_NOTYPE = sym.st_type() { - visitor.visit_data_symbol(sym)?; - } - } - - if self.check_pc_table { - if let Some(pcs_table) = visitor.delimiters.pcs_table(false) { - if let Ok(blocks) = self.provide_from_pcs_table(pcs_table) { - return Ok(blocks); - } - } - } - - if let Some(inline_table) = visitor.delimiters.inline_table(false) { - return self.provide_from_inline_table(inline_table); - } - - anyhow::bail!("unable to find Sancov table") - } - - pub fn provide_from_inline_table( - &mut self, - inline_table: SancovTable, - ) -> Result> { - let mut visitor = InlineAccessVisitor::new(inline_table, self.ctx); - - for sym in self.ctx.elf.syms.iter() { - visitor.visit_symbol(&sym)?; - } - - Ok(visitor.scanner.offsets) - } - - pub fn provide_from_pcs_table(&mut self, pcs_table: SancovTable) -> Result> { - let vm_offset: u64 = pcs_table.offset.into(); - let va = self.ctx.base + vm_offset; - let file_offset = self.ctx.va_to_file_offset(va, None)?; - let file_range = file_offset..(file_offset + pcs_table.size); - - let table_data = self - .ctx - .data - .get(file_range) - .ok_or_else(|| format_err!("Sancov table data out of file range"))?; - - // Assumes x86-64, `sizeof(uintptr_t) == 8`. - // - // Should check if `e_ident[EI_CLASS]` is `ELFCLASS32` or `ELFCLASS64`, - // or equivalently, `elf.is_64`. - if table_data.len() % 16 != 0 { - anyhow::bail!("invalid PC table size"); - } - - let mut pcs = BTreeSet::default(); - - // Each entry is a struct with 2 `uintptr_t` values: a PC, then a flag. - // We only want the PC, so start at 0 (the default) and step by 2 to - // skip the flags. - for chunk in table_data.chunks(8).step_by(2) { - let le: [u8; 8] = chunk.try_into()?; - let pc = u64::from_le_bytes(le); - let pc_vm_offset = self.ctx.va_to_vm_offset(pc)?; - pcs.insert(pc_vm_offset); - } - - Ok(pcs) - } -} - -struct DelimiterVisitor<'d, 'e> { - ctx: ElfContext<'d, 'e>, - delimiters: SancovDelimiters, -} - -impl<'d, 'e> DelimiterVisitor<'d, 'e> { - pub fn new(ctx: ElfContext<'d, 'e>) -> Self { - let delimiters = SancovDelimiters::default(); - - Self { ctx, delimiters } - } - - pub fn visit_data_symbol(&mut self, sym: Sym) -> Result<()> { - let va = sym.st_value; - - if va == 0 { - return Ok(()); - } - - let offset = self.ctx.va_to_vm_offset(va)?; - let name = self.ctx.try_symbol_name(&sym)?; - - if let Ok(delimiter) = name.parse() { - self.delimiters.insert(delimiter, offset); - } - - Ok(()) - } -} - -pub struct InlineAccessVisitor<'d, 'e> { - ctx: ElfContext<'d, 'e>, - scanner: SancovInlineAccessScanner, -} - -impl<'d, 'e> InlineAccessVisitor<'d, 'e> { - pub fn new(table: SancovTable, ctx: ElfContext<'d, 'e>) -> Self { - let scanner = SancovInlineAccessScanner::new(ctx.base, table); - - Self { ctx, scanner } - } - - pub fn visit_symbol(&mut self, sym: &Sym) -> Result<()> { - if sym.st_size == 0 { - return Ok(()); - } - - if !sym.is_function() { - return Ok(()); - } - - if sym.is_import() { - return Ok(()); - } - - let va = sym.st_value; - - let file_range = { - let index = sym.st_shndx.into(); - let lo: usize = self.ctx.va_to_file_offset(va, index)?; - let hi: usize = lo + usize::try_from(sym.st_size)?; - lo..hi - }; - let data = self - .ctx - .data - .get(file_range) - .ok_or_else(|| format_err!("procedure out of data bounds"))?; - - self.scanner.scan(data, va)?; - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/filter.rs b/src/agent/coverage-legacy/src/filter.rs deleted file mode 100644 index 70e09a49a4..0000000000 --- a/src/agent/coverage-legacy/src/filter.rs +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::Result; -use regex::RegexSet; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -pub enum Filter { - Include(Include), - Exclude(Exclude), -} - -impl Filter { - pub fn includes(&self, name: impl AsRef) -> bool { - match self { - Self::Include(f) => f.includes(name), - Self::Exclude(f) => f.includes(name), - } - } -} - -impl Default for Filter { - fn default() -> Self { - Self::Include(Include::all()) - } -} - -/// Filter that includes only those names which match a specific pattern. -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(transparent)] -pub struct Include { - #[serde(with = "self::regex_set")] - regexes: RegexSet, -} - -impl Include { - /// Build a filter that includes only the given patterns. - /// - /// If `exprs` is empty, then no names will be included. - pub fn new(exprs: &[impl AsRef]) -> Result { - let regexes = RegexSet::new(exprs)?; - Ok(Self { regexes }) - } - - /// Build a filter that includes all names. - pub fn all() -> Self { - Self::new(&[".*"]).expect("error constructing filter from static, valid regex") - } - - /// Returns `true` if `name` is included. - pub fn includes(&self, name: impl AsRef) -> bool { - self.regexes.is_match(name.as_ref()) - } -} - -impl Default for Include { - fn default() -> Self { - Self::all() - } -} - -impl From for Filter { - fn from(include: Include) -> Self { - Self::Include(include) - } -} - -/// Filter that excludes only those names which match a specific pattern. -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(transparent)] -pub struct Exclude { - #[serde(with = "self::regex_set")] - regexes: RegexSet, -} - -impl Exclude { - /// Build a filter that excludes only the given patterns. - /// - /// If `exprs` is empty, then no names will be denied. - pub fn new(exprs: &[impl AsRef]) -> Result { - let regexes = RegexSet::new(exprs)?; - Ok(Self { regexes }) - } - - /// Build a filter that includes all names. - pub fn none() -> Self { - let empty: &[&str] = &[]; - Self::new(empty).expect("error constructing filter from static, empty regex set") - } - - /// Returns `true` if `name` is included. - pub fn includes(&self, name: impl AsRef) -> bool { - !self.regexes.is_match(name.as_ref()) - } -} - -impl Default for Exclude { - fn default() -> Self { - Self::none() - } -} - -impl From for Filter { - fn from(exclude: Exclude) -> Self { - Self::Exclude(exclude) - } -} - -mod regex_set { - use std::fmt; - - use regex::RegexSet; - use serde::de::{self, Deserializer, SeqAccess, Visitor}; - use serde::ser::{SerializeSeq, Serializer}; - - pub fn serialize(regexes: &RegexSet, ser: S) -> Result - where - S: Serializer, - { - let patterns = regexes.patterns(); - let mut seq = ser.serialize_seq(Some(patterns.len()))?; - for p in patterns { - seq.serialize_element(p)?; - } - seq.end() - } - - struct RegexSetVisitor; - - impl<'d> Visitor<'d> for RegexSetVisitor { - type Value = RegexSet; - - fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "a vec of strings which compile as regexes") - } - - fn visit_seq(self, mut seq: A) -> Result - where - A: SeqAccess<'d>, - { - let mut patterns = Vec::::new(); - - while let Some(p) = seq.next_element()? { - patterns.push(p); - } - - let regexes = RegexSet::new(patterns).map_err(de::Error::custom)?; - - Ok(regexes) - } - } - - pub fn deserialize<'d, D>(de: D) -> Result - where - D: Deserializer<'d>, - { - de.deserialize_seq(RegexSetVisitor) - } -} diff --git a/src/agent/coverage-legacy/src/intel.rs b/src/agent/coverage-legacy/src/intel.rs deleted file mode 100644 index dbca88ef77..0000000000 --- a/src/agent/coverage-legacy/src/intel.rs +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::{Context, Result}; -use fixedbitset::FixedBitSet; -use iced_x86::{Decoder, DecoderOptions, FlowControl, Instruction, OpKind}; - -use crate::pe::TryInsert; - -fn process_near_branch(instruction: &Instruction, blocks: &mut FixedBitSet) -> Result<()> { - match instruction.op0_kind() { - OpKind::NearBranch16 => {} - OpKind::NearBranch32 => {} - OpKind::NearBranch64 => { - // Note we do not check if the branch takes us to another function, e.g. - // with a tail call. - let off = instruction.near_branch_target() as usize; - blocks - .try_insert(off) - .context("inserting block for near branch target")?; - } - OpKind::FarBranch16 => {} - OpKind::FarBranch32 => {} - _ => {} - } - - Ok(()) -} - -pub fn find_blocks( - bitness: u32, - bytes: &[u8], - func_rva: u32, - blocks: &mut FixedBitSet, -) -> Result<()> { - // We *could* maybe pass `DecoderOptions::AMD_BRANCHES | DecoderOptions::JMPE` because - // we only care about control flow here, but it's not clear we'll ever see those instructions - // and we don't need precise coverage so it doesn't matter too much. - let mut decoder = Decoder::new(bitness, bytes, DecoderOptions::NONE); - decoder.set_ip(func_rva as u64); - - let mut instruction = Instruction::default(); - while decoder.can_decode() { - decoder.decode_out(&mut instruction); - - match instruction.flow_control() { - FlowControl::Next => {} - FlowControl::ConditionalBranch => { - process_near_branch(&instruction, blocks)?; - - let off = instruction.next_ip() as usize; - blocks - .try_insert(off) - .context("inserting block for next PC after conditional branch")?; - } - FlowControl::UnconditionalBranch => { - process_near_branch(&instruction, blocks)?; - } - FlowControl::IndirectBranch => {} - FlowControl::Return => {} - FlowControl::Call => {} - FlowControl::IndirectCall => {} - FlowControl::Interrupt => {} - FlowControl::XbeginXabortXend => {} - FlowControl::Exception => {} - } - } - - Ok(()) -} diff --git a/src/agent/coverage-legacy/src/lib.rs b/src/agent/coverage-legacy/src/lib.rs deleted file mode 100644 index d87e5c42c1..0000000000 --- a/src/agent/coverage-legacy/src/lib.rs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -#![allow(clippy::as_conversions)] -#![allow(clippy::new_without_default)] - -#[cfg(target_os = "windows")] -mod intel; - -#[cfg(target_os = "windows")] -pub mod pdb; - -#[cfg(target_os = "windows")] -pub mod pe; - -#[cfg(target_os = "linux")] -pub mod elf; - -pub mod block; -pub mod cache; -pub mod cobertura; -pub mod code; -pub mod debuginfo; -pub mod demangle; -pub mod report; -pub mod sancov; -pub mod source; - -#[cfg(target_os = "linux")] -pub mod disasm; - -pub mod filter; -mod region; - -#[cfg(test)] -mod test; diff --git a/src/agent/coverage-legacy/src/pdb.rs b/src/agent/coverage-legacy/src/pdb.rs deleted file mode 100644 index a2a439e9fc..0000000000 --- a/src/agent/coverage-legacy/src/pdb.rs +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::{ - ffi::CStr, - fs, - path::{Path, PathBuf}, -}; - -use anyhow::Result; -use debugger::dbghelp::DebugHelpGuard; -use goblin::pe::{debug::DebugData, PE}; -use winapi::um::{dbghelp::SYMOPT_EXACT_SYMBOLS, winnt::HANDLE}; - -// This is a fallback pseudo-handle used for interacting with dbghelp. -// -// We want to avoid `(HANDLE) -1`, because that pseudo-handle is reserved for -// the current process. Reusing it is documented as causing unexpected dbghelp -// behavior when debugging other processes (which we typically will be). -// -// By picking some other very large value, we avoid collisions with handles that -// are concretely either table indices or virtual addresses. -// -// See: https://docs.microsoft.com/en-us/windows/win32/api/dbghelp/nf-dbghelp-syminitializew -const PSEUDO_HANDLE: HANDLE = -2i64 as _; - -pub fn find_pdb_path( - pe_path: &Path, - pe: &PE, - target_handle: Option, -) -> Result> { - let cv = if let Some(DebugData { - image_debug_directory: _, - codeview_pdb70_debug_info: Some(cv), - }) = pe.debug_data - { - cv - } else { - anyhow::bail!("PE missing Codeview PDB debug info: {}", pe_path.display(),) - }; - - let cv_filename = CStr::from_bytes_with_nul(cv.filename)?.to_str()?; - - // This field is named `filename`, but it may be an absolute path. - // The callee `find_pdb_file_in_path()` handles either. - let cv_filename = Path::new(cv_filename); - - // If the PE-specified PDB file exists on disk, use that. - if let Ok(metadata) = fs::metadata(cv_filename) { - if metadata.is_file() { - return Ok(Some(cv_filename.to_owned())); - } - } - - // If we have one, use the the process handle for an existing debug - let handle = target_handle.unwrap_or(PSEUDO_HANDLE); - - let dbghelp = debugger::dbghelp::lock()?; - - // If a target handle was provided, we assume the caller initialized the - // dbghelp symbol handler, and will clean up after itself. - // - // Otherwise, initialize a symbol handler with our own pseudo-path, and use - // a drop guard to ensure we don't leak resources. - let _cleanup = if target_handle.is_some() { - None - } else { - dbghelp.sym_initialize(handle)?; - Some(DbgHelpCleanupGuard::new(&dbghelp, handle)) - }; - - // Enable signature and age checking. - let options = dbghelp.sym_get_options(); - dbghelp.sym_set_options(options | SYMOPT_EXACT_SYMBOLS); - - let mut search_path = dbghelp.sym_get_search_path(handle)?; - - log::debug!("initial search path = {:?}", search_path); - - // Try to add the directory of the PE to the PDB search path. - // - // This may be redundant, and should always succeed. - if let Some(pe_dir) = pe_path.parent() { - log::debug!("pushing PE dir to search path = {:?}", pe_dir.display()); - - search_path.push(";"); - search_path.push(pe_dir); - } else { - log::warn!("PE path has no parent dir: {}", pe_path.display()); - } - - dbghelp.sym_set_search_path(handle, search_path)?; - - let pdb_path = - dbghelp.find_pdb_file_in_path(handle, cv_filename, cv.codeview_signature, cv.age)?; - - Ok(pdb_path) -} - -/// On drop, deallocates all resources associated with its process handle. -struct DbgHelpCleanupGuard<'d> { - dbghelp: &'d DebugHelpGuard, - process_handle: HANDLE, -} - -impl<'d> DbgHelpCleanupGuard<'d> { - pub fn new(dbghelp: &'d DebugHelpGuard, process_handle: HANDLE) -> Self { - Self { - dbghelp, - process_handle, - } - } -} - -impl<'d> Drop for DbgHelpCleanupGuard<'d> { - fn drop(&mut self) { - if let Err(err) = self.dbghelp.sym_cleanup(self.process_handle) { - log::error!("error cleaning up symbol handler: {:?}", err); - } - } -} diff --git a/src/agent/coverage-legacy/src/pe.rs b/src/agent/coverage-legacy/src/pe.rs deleted file mode 100644 index 2617a2f656..0000000000 --- a/src/agent/coverage-legacy/src/pe.rs +++ /dev/null @@ -1,350 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#![allow(clippy::manual_swap)] - -use std::{fs::File, path::Path}; - -use anyhow::{bail, Context, Result}; -use fixedbitset::FixedBitSet; -use goblin::pe::PE; -use memmap2::Mmap; -use pdb::{ - AddressMap, FallibleIterator, PdbInternalSectionOffset, ProcedureSymbol, TypeIndex, PDB, -}; -use winapi::um::winnt::{HANDLE, IMAGE_FILE_MACHINE_AMD64, IMAGE_FILE_MACHINE_I386}; - -use crate::intel; - -struct JumpTableData { - pub offset: PdbInternalSectionOffset, - pub labels: Vec, -} - -impl JumpTableData { - pub fn new(offset: PdbInternalSectionOffset) -> Self { - Self { - offset, - labels: vec![], - } - } -} - -struct ProcSymInfo { - pub name: String, - pub offset: PdbInternalSectionOffset, - pub code_len: u32, - pub jump_tables: Vec, - pub extra_labels: Vec, -} - -impl ProcSymInfo { - pub fn new( - name: String, - offset: PdbInternalSectionOffset, - code_len: u32, - jump_tables: Vec, - extra_labels: Vec, - ) -> Self { - Self { - name, - offset, - code_len, - jump_tables, - extra_labels, - } - } -} - -fn offset_within_func(offset: PdbInternalSectionOffset, proc: &ProcedureSymbol) -> bool { - offset.section == proc.offset.section - && offset.offset >= proc.offset.offset - && offset.offset < (proc.offset.offset + proc.len) -} - -fn collect_func_sym_info( - symbols: &mut pdb::SymbolIter<'_>, - proc: ProcedureSymbol, -) -> Result { - let mut jump_tables = vec![]; - let mut extra_labels = vec![]; - while let Some(symbol) = symbols.next()? { - // Symbols are scoped with `end` marking the last symbol in the scope of the function. - if symbol.index() == proc.end { - break; - } - - match symbol.parse() { - Ok(pdb::SymbolData::Data(data)) => { - // Local data *might* be a jump table if it's in the same section as - // the function. For extra paranoia, we also check that there is no type - // as that is what VC++ generates. LLVM does not generate debug symbols for - // jump tables. - if offset_within_func(data.offset, &proc) && data.type_index == TypeIndex(0) { - jump_tables.push(JumpTableData::new(data.offset)); - } - } - Ok(pdb::SymbolData::Label(label)) => { - if offset_within_func(label.offset, &proc) { - if let Some(jump_table) = jump_tables.last_mut() { - jump_table.labels.push(label.offset); - } else { - // Maybe not possible to get here, and maybe a bad idea for VC++ - // because the code length would include this label, - // but could be useful if LLVM generates labels but no L_DATA32 record. - extra_labels.push(label.offset); - } - } - } - Ok(_) - | Err(pdb::Error::UnimplementedFeature(_)) - | Err(pdb::Error::UnimplementedSymbolKind(_)) => {} - Err(err) => { - anyhow::bail!("Error reading symbols: {}", err); - } - } - } - - let result = ProcSymInfo::new( - proc.name.to_string().to_string(), - proc.offset, - proc.len, - jump_tables, - extra_labels, - ); - Ok(result) -} - -fn collect_proc_symbols(symbols: &mut pdb::SymbolIter<'_>) -> Result> { - let mut result = vec![]; - - while let Some(symbol) = symbols.next()? { - match symbol.parse() { - Ok(pdb::SymbolData::Procedure(proc)) => { - // Collect everything we need for safe disassembly of the function. - result.push(collect_func_sym_info(symbols, proc)?); - } - Ok(_) - | Err(pdb::Error::UnimplementedFeature(_)) - | Err(pdb::Error::UnimplementedSymbolKind(_)) => {} - Err(err) => { - anyhow::bail!("Error reading symbols: {}", err); - } - } - } - - Ok(result) -} - -fn find_blocks( - proc_data: &[ProcSymInfo], - blocks: &mut FixedBitSet, - address_map: &AddressMap, - pe: &PE, - data: &[u8], - functions_only: bool, -) -> Result<()> { - let file_alignment = pe - .header - .optional_header - .unwrap() - .windows_fields - .file_alignment; - let machine = pe.header.coff_header.machine; - let bitness = match machine { - IMAGE_FILE_MACHINE_I386 => 32, - IMAGE_FILE_MACHINE_AMD64 => 64, - _ => anyhow::bail!("Unsupported architecture {}", machine), - }; - - let parse_options = goblin::pe::options::ParseOptions::default(); - - for proc in proc_data { - if let Some(rva) = proc.offset.to_rva(address_map) { - blocks - .try_insert(rva.0 as usize) - .context("inserting block for procedure offset")?; - - if functions_only { - continue; - } - - if let Some(file_offset) = goblin::pe::utils::find_offset( - rva.0 as usize, - &pe.sections, - file_alignment, - &parse_options, - ) { - // VC++ includes jump tables with the code length which we must exclude - // from disassembly. We use the minimum address of a jump table since - // the tables are placed consecutively after the actual code. - // - // LLVM 9 **does not** include debug info for jump tables, but conveniently - // does not include the jump tables in the code length. - let mut code_len = proc.code_len; - - for table in &proc.jump_tables { - if table.offset.section == proc.offset.section - && table.offset.offset > proc.offset.offset - && (proc.offset.offset + code_len) > table.offset.offset - { - code_len = table.offset.offset - proc.offset.offset; - } - - for label in &table.labels { - if let Some(rva) = label.to_rva(address_map) { - blocks - .try_insert(rva.0 as usize) - .context("inserting block for offset from label")?; - } - } - } - - for label in &proc.extra_labels { - if let Some(rva) = label.to_rva(address_map) { - blocks - .try_insert(rva.0 as usize) - .context("inserting block for offset from extra labels")?; - } - } - - log::trace!( - "analyzing func: {} rva: 0x{:x} file_offset: 0x{:x}", - &proc.name, - rva.0, - file_offset - ); - - intel::find_blocks( - bitness, - &data[file_offset..file_offset + (code_len as usize)], - rva.0, - blocks, - )?; - } - } - } - - Ok(()) -} - -pub fn process_module( - pe_path: impl AsRef, - data: &[u8], - pe: &PE, - functions_only: bool, - target_handle: Option, -) -> Result { - let pdb_path = crate::pdb::find_pdb_path(pe_path.as_ref(), pe, target_handle) - .with_context(|| format!("searching for PDB for PE: {}", pe_path.as_ref().display()))?; - - if let Some(pdb_path) = pdb_path { - log::info!("found PDB: {}", pdb_path.display()); - process_pdb(data, pe, functions_only, &pdb_path) - .with_context(|| format!("processing PDB: {}", pdb_path.display())) - } else { - anyhow::bail!("PDB not found for PE: {}", pe_path.as_ref().display()) - } -} - -fn process_pdb(data: &[u8], pe: &PE, functions_only: bool, pdb_path: &Path) -> Result { - let pdb_file = File::open(pdb_path).context("opening PDB")?; - let mut pdb = PDB::open(pdb_file).context("parsing PDB")?; - - let address_map = pdb.address_map()?; - let mut blocks = FixedBitSet::with_capacity(data.len()); - let proc_sym_info = collect_proc_symbols(&mut pdb.global_symbols()?.iter())?; - - find_blocks( - &proc_sym_info[..], - &mut blocks, - &address_map, - pe, - data, - functions_only, - )?; - - // Modules in the PDB correspond to object files. - let dbi = pdb.debug_information()?; - let mut modules = dbi.modules()?; - while let Some(module) = modules.next()? { - if let Some(info) = pdb.module_info(&module)? { - let proc_sym_info = collect_proc_symbols(&mut info.symbols()?)?; - find_blocks( - &proc_sym_info[..], - &mut blocks, - &address_map, - pe, - data, - functions_only, - )?; - } - } - - Ok(blocks) -} - -pub fn process_image( - path: impl AsRef, - functions_only: bool, - handle: Option, -) -> Result { - let file = File::open(path.as_ref())?; - let mmap = unsafe { Mmap::map(&file)? }; - let pe = PE::parse(&mmap)?; - - process_module(path, &mmap, &pe, functions_only, handle) -} - -pub(crate) trait TryInsert { - fn try_insert(&mut self, bit: usize) -> Result<()>; -} - -impl TryInsert for FixedBitSet { - fn try_insert(&mut self, bit: usize) -> Result<()> { - if bit < self.len() { - self.insert(bit); - } else { - bail!("bit index {} exceeds bitset length {}", bit, self.len()) - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use anyhow::Result; - use fixedbitset::FixedBitSet; - - use super::TryInsert; - - #[test] - fn test_fixedbitset_try_insert() -> Result<()> { - let capacity = 8; - let in_bounds = 4; - let out_of_bounds = 123; - - let mut bitset = FixedBitSet::with_capacity(capacity); - - // Inserts when in-bounds. - assert!(!bitset.contains(0)); - bitset.try_insert(0)?; - assert!(bitset.contains(0)); - - assert!(!bitset.contains(in_bounds)); - bitset.try_insert(in_bounds)?; - assert!(bitset.contains(in_bounds)); - - // Errors when out of bounds. - assert!(!bitset.contains(capacity)); - assert!(bitset.try_insert(capacity).is_err()); - assert!(!bitset.contains(capacity)); - - assert!(!bitset.contains(out_of_bounds)); - assert!(bitset.try_insert(out_of_bounds).is_err()); - assert!(!bitset.contains(out_of_bounds)); - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/region.rs b/src/agent/coverage-legacy/src/region.rs deleted file mode 100644 index 7c612ef27d..0000000000 --- a/src/agent/coverage-legacy/src/region.rs +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::ops::Range; - -use serde::{Deserialize, Serialize}; - -/// A non-overlapping set of regions of program data. -#[derive(Clone, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize)] -#[serde(transparent)] -pub struct RegionIndex { - regions: BTreeMap, -} - -// `Default` impl is defined even when `R: !Default`. -impl Default for RegionIndex { - fn default() -> Self { - let regions = BTreeMap::default(); - - Self { regions } - } -} - -impl RegionIndex -where - R: Region + Debug, -{ - pub fn iter(&self) -> impl Iterator { - self.regions.values() - } - - /// Find the region that contains `pos`, if any. - /// - /// Regions are non-overlapping, so if one exists, it is unique. - pub fn find(&self, pos: u64) -> Option<&R> { - // The highest base for a region that contains `pos` is exactly `pos`. Starting - // there, iterate down over region bases until we find one whose span contains - // `pos`. If a candidate region (exclusive) end is not greater than `pos`, we can - // stop, since our iteration is ordered and decreasing. - for (_base, region) in self.regions.range(..=pos).rev() { - let range = region.range(); - - if range.contains(&pos) { - return Some(region); - } - - // When we see a candidate region that ends below `pos`, we are done. Since we - // maintain the invariant that regions do not overlap, all pending regions are - // below the current region, and so no other region can possibly contain `pos`. - // - // Recall that `range.end` is exclusive, so the case `end == pos` means that - // the region ends 1 byte before `pos`. - if range.end <= pos { - return None; - } - } - - None - } - - /// Attempt to insert a new region into the index. - /// - /// The region is always inserted unless it would intersect an existing - /// entry. Returns `true` if inserted, `false` otherwise. - pub fn insert(&mut self, region: R) -> bool { - if let Some(existing) = self.find(region.base()) { - log::debug!( - "existing region contains start of new region: {:x?}", - existing - ); - return false; - } - - if let Some(existing) = self.find(region.last()) { - log::debug!( - "existing region contains end of new region: {:x?}", - existing - ); - return false; - } - - self.regions.insert(region.base(), region); - - true - } - - /// Remove the region based at `base`, if it exists. - pub fn remove(&mut self, base: u64) -> Option { - self.regions.remove(&base) - } -} - -/// A non-empty region of program data, in-memory or on-disk. -/// -/// Requirements: -/// - `size` must be nonzero -/// - `range` must be bounded and nonempty -pub trait Region { - /// Return the base of the region, which must agree with the inclusive range start. - fn base(&self) -> u64; - - /// Return the size of the region in bytes. - fn size(&self) -> u64; - - /// Return the last byte position contained in the region. - fn last(&self) -> u64 { - // This is the exclusive upper bound, and not contained in the region. - let end = self.base() + self.size(); - - // We require `size()` is at least 1, so we can decrement and stay in the region - // bounds. In particular, we will not return a value less than `base` or underflow - // if `base` is 0. - end - 1 - } - - /// Return a `Range` object that describes the region positions. - fn range(&self) -> Range { - // Inclusive lower bound. - let lo = self.base(); - - // Exclusive upper bound. - let hi = lo + self.size(); - - lo..hi - } -} diff --git a/src/agent/coverage-legacy/src/report.rs b/src/agent/coverage-legacy/src/report.rs deleted file mode 100644 index d4a93ad764..0000000000 --- a/src/agent/coverage-legacy/src/report.rs +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use serde::{Deserialize, Serialize}; - -/// Generic container for a code coverage report. -/// -/// Coverage is reported as a sequence of module coverage entries, which are -/// generic in a coverage type `C` and a metadata type `M`. -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -#[serde(transparent)] -pub struct CoverageReport { - /// Coverage data for each module. - pub entries: Vec>, -} - -/// A generic entry in a code coverage report. -/// -/// `C` is the coverage type. It should have a field whose value is a map or -/// sequence of instrumented sites and associated counters. -/// -/// `M` is the metadata type. It should include additional data about the module -/// itself. It enables tracking provenance and disambiguating modules when the -/// `module` field is insufficient. Examples: module file checksums, process -/// identifiers. If not desired, it may be set to `()`. -/// -/// The types `C` and `M` must be structs with named fields, and should at least -/// implement `Serialize` and `Deserialize`. -/// -/// Warning: `serde` allows duplicate keys. If `M` and `C` share field names as -/// structs, then the serialized entry will have duplicate keys. -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -pub struct CoverageReportEntry { - /// Path or name of the module. - pub module: String, - - /// Metadata to identify or contextualize the module. - #[serde(flatten)] - pub metadata: M, - - /// Coverage data for the module. - #[serde(flatten)] - pub coverage: C, -} - -#[cfg(test)] -mod tests { - use anyhow::Result; - use serde_json::json; - - use crate::test::module_path; - - use super::*; - - #[derive(Debug, Deserialize, Eq, PartialEq, Serialize)] - struct Metadata { - checksum: String, - pid: u64, - } - - #[derive(Debug, Deserialize, Eq, PartialEq, Serialize)] - struct Edge { - edges: Vec, - } - - #[derive(Debug, Deserialize, Eq, PartialEq, Serialize)] - struct EdgeCov { - src: u32, - dst: u32, - count: u32, - } - - // Example of using `CoverageReport` for alternative coverage types. - type EdgeCoverageReport = CoverageReport; - - #[test] - fn test_coverage_report() -> Result<()> { - let main_exe = module_path("/onefuzz/main.exe")?; - let some_dll = module_path("/common/some.dll")?; - - let text = serde_json::to_string(&json!([ - { - "module": some_dll, - "checksum": "5feceb66", - "pid": 123, - "edges": [ - { "src": 10, "dst": 20, "count": 0 }, - { "src": 10, "dst": 30, "count": 1 }, - { "src": 30, "dst": 40, "count": 1 }, - ], - }, - { - "module": some_dll, - "checksum": "ffc86f38", - "pid": 456, - "edges": [ - { "src": 100, "dst": 200, "count": 1 }, - { "src": 200, "dst": 300, "count": 0 }, - { "src": 300, "dst": 400, "count": 0 }, - ], - }, - { - "module": main_exe, - "checksum": "d952786c", - "pid": 123, - "edges": [ - { "src": 1000, "dst": 2000, "count": 1 }, - { "src": 2000, "dst": 3000, "count": 0 }, - ], - }, - ]))?; - - let report = EdgeCoverageReport { - entries: vec![ - CoverageReportEntry { - module: some_dll.to_string(), - metadata: Metadata { - checksum: "5feceb66".into(), - pid: 123, - }, - coverage: Edge { - edges: vec![ - EdgeCov { - src: 10, - dst: 20, - count: 0, - }, - EdgeCov { - src: 10, - dst: 30, - count: 1, - }, - EdgeCov { - src: 30, - dst: 40, - count: 1, - }, - ], - }, - }, - CoverageReportEntry { - module: some_dll.to_string(), - metadata: Metadata { - checksum: "ffc86f38".into(), - pid: 456, - }, - coverage: Edge { - edges: vec![ - EdgeCov { - src: 100, - dst: 200, - count: 1, - }, - EdgeCov { - src: 200, - dst: 300, - count: 0, - }, - EdgeCov { - src: 300, - dst: 400, - count: 0, - }, - ], - }, - }, - CoverageReportEntry { - module: main_exe.to_string(), - metadata: Metadata { - checksum: "d952786c".into(), - pid: 123, - }, - coverage: Edge { - edges: vec![ - EdgeCov { - src: 1000, - dst: 2000, - count: 1, - }, - EdgeCov { - src: 2000, - dst: 3000, - count: 0, - }, - ], - }, - }, - ], - }; - - let ser = serde_json::to_string(&report)?; - assert_eq!(ser, text); - - let de: EdgeCoverageReport = serde_json::from_str(&text)?; - assert_eq!(de, report); - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/sancov.rs b/src/agent/coverage-legacy/src/sancov.rs deleted file mode 100644 index 2d9e247df0..0000000000 --- a/src/agent/coverage-legacy/src/sancov.rs +++ /dev/null @@ -1,446 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use std::collections::BTreeSet; -use std::convert::TryInto; - -use anyhow::{format_err, Result}; -use iced_x86::{Decoder, DecoderOptions, Instruction, Mnemonic, OpKind, Register}; - -/// Size of padding inserted (on Window) between `__start_` delimiter symbols -/// and the first entry of the delimited table's array. -/// -/// To find the true start offset of the table, add this to the symbol value. -const DELIMITER_START_PADDING: u32 = 8; - -#[derive(Default)] -pub struct SancovDelimiters { - llvm_bools_start: Option, - llvm_bools_stop: Option, - llvm_counters_start: Option, - llvm_counters_stop: Option, - llvm_pcs_start: Option, - llvm_pcs_stop: Option, - - msvc_bools_start: Option, - msvc_bools_stop: Option, - msvc_counters_start: Option, - msvc_counters_stop: Option, - msvc_pcs_start: Option, - msvc_pcs_stop: Option, - msvc_preview_counters_start: Option, - msvc_preview_counters_stop: Option, -} - -// Define a partial accessor method that returns the named Sancov table region when -// -// 1. Both the `$start` and `$stop` delimiter symbols are present -// 2. The delimited region is non-empty -// -// Sancov `$start` delimiters are usually declared as 8 byte values to ensure that they predictably -// anchor the delimited table during linking. If `$pad` is true, adjust for this so that the `start` -// offset in the returned `SancovTable` denotes the actual offset of the first table entry. -macro_rules! define_table_getter { - ( - name = $name: ident, - start = $start: ident, - stop = $stop: ident, - ty = $ty: expr, - ) => { - pub fn $name(&self, pad: bool) -> Option { - let offset = if pad { - self.$start?.checked_add(DELIMITER_START_PADDING)? - } else { - self.$start? - }; - - let size = self.$stop?.checked_sub(offset)?.try_into().ok()?; - - // The delimiters may be present even when the table is unused. We can detect this case - // by an empty delimited region. - if size == 0 { - return None; - } - - let ty = $ty; - Some(SancovTable { ty, offset, size }) - } - }; - // Accept trailing comma. - ( - name = $name: ident, - start = $start: ident, - stop = $stop: ident, - ty = $ty: expr, - ) => { - define_table_getter!(name = $name, start = $start, stop = $stop, ty = $ty,); - }; -} - -impl SancovDelimiters { - /// Return the most compiler-specific Sancov inline counter or bool flag table, if any. - pub fn inline_table(&self, pad: bool) -> Option { - // With MSVC, the LLVM delimiters are typically linked in alongside the - // MSVC-specific symbols. Check for MSVC-delimited tables first, though - // our validation of table size _should_ make this unnecessary. - - if let Some(table) = self.msvc_bools_table(pad) { - return Some(table); - } - - if let Some(table) = self.msvc_counters_table(pad) { - return Some(table); - } - - if let Some(table) = self.msvc_preview_counters_table(pad) { - return Some(table); - } - - // No MSVC tables found. Check for LLVM-emitted tables. - - if let Some(table) = self.llvm_bools_table(pad) { - return Some(table); - } - - if let Some(table) = self.llvm_counters_table(pad) { - return Some(table); - } - - None - } - - /// Return the most compiler-specific PC table, if any. - pub fn pcs_table(&self, pad: bool) -> Option { - // Check for MSVC tables first. - if let Some(table) = self.msvc_pcs_table(pad) { - return Some(table); - } - - if let Some(table) = self.llvm_pcs_table(pad) { - return Some(table); - } - - None - } - - define_table_getter!( - name = llvm_bools_table, - start = llvm_bools_start, - stop = llvm_bools_stop, - ty = SancovTableTy::Bools, - ); - - define_table_getter!( - name = llvm_counters_table, - start = llvm_counters_start, - stop = llvm_counters_stop, - ty = SancovTableTy::Counters, - ); - - define_table_getter!( - name = llvm_pcs_table, - start = llvm_pcs_start, - stop = llvm_pcs_stop, - ty = SancovTableTy::Pcs, - ); - - define_table_getter!( - name = msvc_bools_table, - start = msvc_bools_start, - stop = msvc_bools_stop, - ty = SancovTableTy::Bools, - ); - - define_table_getter!( - name = msvc_counters_table, - start = msvc_counters_start, - stop = msvc_counters_stop, - ty = SancovTableTy::Counters, - ); - - define_table_getter!( - name = msvc_pcs_table, - start = msvc_pcs_start, - stop = msvc_pcs_stop, - ty = SancovTableTy::Pcs, - ); - - define_table_getter!( - name = msvc_preview_counters_table, - start = msvc_preview_counters_start, - stop = msvc_preview_counters_stop, - ty = SancovTableTy::Counters, - ); - - pub fn insert(&mut self, delimiter: Delimiter, offset: u32) { - let offset = Some(offset); - - match delimiter { - Delimiter::LlvmBoolsStart => { - self.llvm_bools_start = offset; - } - Delimiter::LlvmBoolsStop => { - self.llvm_bools_stop = offset; - } - Delimiter::LlvmCountersStart => { - self.llvm_counters_start = offset; - } - Delimiter::LlvmCountersStop => { - self.llvm_counters_stop = offset; - } - Delimiter::LlvmPcsStart => { - self.llvm_pcs_start = offset; - } - Delimiter::LlvmPcsStop => { - self.llvm_pcs_stop = offset; - } - Delimiter::MsvcBoolsStart => { - self.msvc_bools_start = offset; - } - Delimiter::MsvcBoolsStop => { - self.msvc_bools_stop = offset; - } - Delimiter::MsvcCountersStart => { - self.msvc_counters_start = offset; - } - Delimiter::MsvcCountersStop => { - self.msvc_counters_stop = offset; - } - Delimiter::MsvcPcsStart => { - self.msvc_pcs_start = offset; - } - Delimiter::MsvcPcsStop => { - self.msvc_pcs_stop = offset; - } - Delimiter::MsvcPreviewCountersStart => { - self.msvc_preview_counters_start = offset; - } - Delimiter::MsvcPreviewCountersStop => { - self.msvc_preview_counters_stop = offset; - } - } - } -} - -/// A table of Sancov instrumentation data. -/// -/// It is an array of either bytes or (packed pairs of) pointer-sized integers. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct SancovTable { - pub ty: SancovTableTy, - - /// Module-relative offset of the first array element. - pub offset: u32, - - /// Size of the array region (in bytes). - /// - /// For `u8`-sized elements, this is also the length, but for PC tables, - /// this will be the product of the length and entry count, where each - /// entry is defined in LLVM as: - /// - /// ```c - /// struct PCTableEntry { - /// uintptr_t PC, PCFlags; - /// }; - /// ``` - pub size: usize, -} - -impl SancovTable { - pub fn range(&self) -> std::ops::Range { - self.offset..(self.offset + (self.size as u32)) - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum SancovTableTy { - Bools, - Counters, - Pcs, -} - -/// Note: on Windows, the LLVM `__start_` delimiter symbols do not denote the -/// first entry of a Sancov table array, but an anchor offset that precedes it -/// by 8 bytes. -/// -/// See: -/// - `compiler-rt/lib/sanitizer_common/sanitizer_coverage_win_sections.cpp` -/// - `ModuleSanitizerCoverage::CreateSecStartEnd()` in -/// `llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp:350-351` -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Delimiter { - LlvmBoolsStart, - LlvmBoolsStop, - LlvmCountersStart, - LlvmCountersStop, - LlvmPcsStart, - LlvmPcsStop, - MsvcBoolsStart, - MsvcBoolsStop, - MsvcCountersStart, - MsvcCountersStop, - MsvcPcsStart, - MsvcPcsStop, - MsvcPreviewCountersStart, - MsvcPreviewCountersStop, -} - -impl std::str::FromStr for Delimiter { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - let delimiter = match s { - "__start___sancov_cntrs" => Self::LlvmBoolsStart, - "__stop___sancov_cntrs" => Self::LlvmBoolsStop, - "__start___sancov_bools" => Self::LlvmCountersStart, - "__stop___sancov_bools" => Self::LlvmCountersStop, - "__start___sancov_pcs" => Self::LlvmPcsStart, - "__stop___sancov_pcs" => Self::LlvmPcsStop, - "__sancov$BoolFlagStart" => Self::MsvcBoolsStart, - "__sancov$BoolFlagEnd" => Self::MsvcBoolsStop, - "__sancov$8bitCountersStart" => Self::MsvcCountersStart, - "__sancov$8bitCountersEnd" => Self::MsvcCountersStop, - "__sancov$PCTableStart" => Self::MsvcPcsStart, - "__sancov$PCTableEnd" => Self::MsvcPcsStop, - "SancovBitmapStart" => Self::MsvcPreviewCountersStart, - "SancovBitmapEnd" => Self::MsvcPreviewCountersStop, - _ => { - anyhow::bail!("string does not match any Sancov delimiter symbol"); - } - }; - - Ok(delimiter) - } -} - -#[derive(Clone, Debug)] -pub struct SancovInlineAccessScanner { - pub base: u64, - pub offsets: BTreeSet, - table: SancovTable, -} - -impl SancovInlineAccessScanner { - pub fn new(base: u64, table: SancovTable) -> Self { - let offsets = BTreeSet::default(); - - Self { - base, - offsets, - table, - } - } - - pub fn scan(&mut self, data: &[u8], va: u64) -> Result<()> { - let mut decoder = Decoder::new(64, data, DecoderOptions::NONE); - - decoder.set_ip(va); - - let mut inst = Instruction::default(); - - while decoder.can_decode() { - decoder.decode_out(&mut inst); - - // If no memory operand, there is no table access. - if !inst.op_kinds().any(|o| o == OpKind::Memory) { - continue; - } - - // Skip any memory access that is not PC-relative or absolute. - if !inst.is_ip_rel_memory_operand() { - if inst.memory_base() != Register::None { - continue; - } - - if inst.memory_index() != Register::None { - continue; - } - - if inst.segment_prefix() != Register::None { - continue; - } - } - - match inst.op_code().mnemonic() { - Mnemonic::Add | Mnemonic::Inc => { - // These may be 8-bit counter updates, check further. - } - Mnemonic::Mov => { - // This may be a bool flag set or the start of an unoptimized - // 8-bit counter update sequence. - // - // mov al, [rel ] - // - // or: - // - // mov [rel
], 1 - match (inst.op0_kind(), inst.op1_kind()) { - (OpKind::Register, OpKind::Memory) => { - // Possible start of an unoptimized 8-bit counter update sequence, like: - // - // mov al, [rel
] - // add al, 1 - // mov [rel
], al - // - // Check the operand sizes. - - if inst.memory_size().size() != 1 { - // Load would span multiple table entries, skip. - continue; - } - - if inst.op0_register().size() != 1 { - // Should be unreachable after a 1-byte load. - continue; - } - } - (OpKind::Memory, OpKind::Immediate8) => { - // Possible bool flag set, like: - // - // mov [rel
], 1 - // - // Check store size and immediate value. - - if inst.memory_size().size() != 1 { - // Store would span multiple table entries, skip. - continue; - } - - if inst.immediate8() != 1 { - // Not a bool flag set, skip. - continue; - } - } - _ => { - // Not a known update pattern, skip. - continue; - } - } - } - _ => { - // Does not correspond to any known counter update, so skip. - continue; - } - } - - // Even when PC-relative, `memory_displacement64()` returns a VA. - let accessed = inst - .memory_displacement64() - .checked_sub(self.base) - .ok_or_else(|| format_err!("underflow converting access VA to offset"))? - .try_into()?; - - if self.table.range().contains(&accessed) { - let offset = inst - .ip() - .checked_sub(self.base) - .ok_or_else(|| format_err!("underflow computing module offset"))? - .try_into()?; - - self.offsets.insert(offset); - } - } - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/source.rs b/src/agent/coverage-legacy/src/source.rs deleted file mode 100644 index 308ee6353b..0000000000 --- a/src/agent/coverage-legacy/src/source.rs +++ /dev/null @@ -1,445 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::Result; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -#[serde(transparent)] -pub struct SourceCoverage { - pub files: Vec, -} - -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -pub struct SourceFileCoverage { - /// UTF-8 encoding of the path to the source file. - pub file: String, - - pub locations: Vec, -} - -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] -pub struct SourceCoverageLocation { - /// Line number of entry in `file` (1-indexed). - pub line: u32, - - /// Optional column offset (0-indexed). - /// - /// When column offsets are present, they should be interpreted as the start - /// of a span bounded by the next in-line column offset (or end-of-line). - pub column: Option, - - /// Execution count at location. - pub count: u32, -} - -impl SourceCoverageLocation { - pub fn new(line: u32, column: impl Into>, count: u32) -> Result { - if line == 0 { - anyhow::bail!("source lines must be 1-indexed"); - } - - let column = column.into(); - - Ok(Self { - line, - column, - count, - }) - } -} - -#[cfg(test)] -mod tests { - use anyhow::Result; - use serde_json::json; - - use super::*; - - const MAIN_C: &str = "src/bin/main.c"; - const COMMON_C: &str = "src/lib/common.c"; - - #[test] - fn test_source_coverage_location() -> Result<()> { - let valid = SourceCoverageLocation::new(5, 4, 1)?; - assert_eq!( - valid, - SourceCoverageLocation { - line: 5, - column: Some(4), - count: 1, - } - ); - - let valid_no_col = SourceCoverageLocation::new(5, None, 1)?; - assert_eq!( - valid_no_col, - SourceCoverageLocation { - line: 5, - column: None, - count: 1, - } - ); - - let invalid = SourceCoverageLocation::new(0, 4, 1); - assert!(invalid.is_err()); - - Ok(()) - } - - #[test] - fn test_source_coverage_full() -> Result<()> { - let text = serde_json::to_string(&json!([ - { - "file": MAIN_C.to_owned(), - "locations": [ - { "line": 4, "column": 4, "count": 1 }, - { "line": 9, "column": 4, "count": 0 }, - { "line": 12, "column": 4, "count": 1 }, - ], - }, - { - "file": COMMON_C.to_owned(), - "locations": [ - { "line": 5, "column": 4, "count": 0 }, - { "line": 5, "column": 9, "count": 1 }, - { "line": 8, "column": 0, "count": 0 }, - ], - }, - ]))?; - - let coverage = { - let files = vec![ - SourceFileCoverage { - file: MAIN_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 4, - column: Some(4), - count: 1, - }, - SourceCoverageLocation { - line: 9, - column: Some(4), - count: 0, - }, - SourceCoverageLocation { - line: 12, - column: Some(4), - count: 1, - }, - ], - }, - SourceFileCoverage { - file: COMMON_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 5, - column: Some(4), - count: 0, - }, - SourceCoverageLocation { - line: 5, - column: Some(9), - count: 1, - }, - SourceCoverageLocation { - line: 8, - column: Some(0), - count: 0, - }, - ], - }, - ]; - SourceCoverage { files } - }; - - let ser = serde_json::to_string(&coverage)?; - assert_eq!(ser, text); - - let de: SourceCoverage = serde_json::from_str(&text)?; - assert_eq!(de, coverage); - - Ok(()) - } - - #[test] - fn test_source_coverage_no_files() -> Result<()> { - let text = serde_json::to_string(&json!([]))?; - - let coverage = SourceCoverage { files: vec![] }; - - let ser = serde_json::to_string(&coverage)?; - assert_eq!(ser, text); - - let de: SourceCoverage = serde_json::from_str(&text)?; - assert_eq!(de, coverage); - - Ok(()) - } - - #[test] - fn test_source_coverage_no_locations() -> Result<()> { - let text = serde_json::to_string(&json!([ - { - "file": MAIN_C.to_owned(), - "locations": [], - }, - { - "file": COMMON_C.to_owned(), - "locations": [], - }, - ]))?; - - let coverage = { - let files = vec![ - SourceFileCoverage { - file: MAIN_C.to_owned(), - locations: vec![], - }, - SourceFileCoverage { - file: COMMON_C.to_owned(), - locations: vec![], - }, - ]; - SourceCoverage { files } - }; - - let ser = serde_json::to_string(&coverage)?; - assert_eq!(ser, text); - - let de: SourceCoverage = serde_json::from_str(&text)?; - assert_eq!(de, coverage); - - Ok(()) - } - - #[test] - fn test_source_coverage_no_or_null_columns() -> Result<()> { - let text_null_cols = serde_json::to_string(&json!([ - { - "file": MAIN_C.to_owned(), - "locations": [ - { "line": 4, "column": null, "count": 1 }, - { "line": 9, "column": null, "count": 0 }, - { "line": 12, "column": null, "count": 1 }, - ], - }, - { - "file": COMMON_C.to_owned(), - "locations": [ - { "line": 5, "column": null, "count": 0 }, - { "line": 5, "column": null, "count": 1 }, - { "line": 8, "column": null, "count": 0 }, - ], - }, - ]))?; - - let text_no_cols = serde_json::to_string(&json!([ - { - "file": MAIN_C.to_owned(), - "locations": [ - { "line": 4, "count": 1 }, - { "line": 9, "count": 0 }, - { "line": 12, "count": 1 }, - ], - }, - { - "file": COMMON_C.to_owned(), - "locations": [ - { "line": 5, "count": 0 }, - { "line": 5, "count": 1 }, - { "line": 8, "count": 0 }, - ], - }, - ]))?; - - let coverage = { - let files = vec![ - SourceFileCoverage { - file: MAIN_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 4, - column: None, - count: 1, - }, - SourceCoverageLocation { - line: 9, - column: None, - count: 0, - }, - SourceCoverageLocation { - line: 12, - column: None, - count: 1, - }, - ], - }, - SourceFileCoverage { - file: COMMON_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 5, - column: None, - count: 0, - }, - SourceCoverageLocation { - line: 5, - column: None, - count: 1, - }, - SourceCoverageLocation { - line: 8, - column: None, - count: 0, - }, - ], - }, - ]; - SourceCoverage { files } - }; - - // Serialized with present `column` keys, `null` values. - let ser = serde_json::to_string(&coverage)?; - assert_eq!(ser, text_null_cols); - - // Deserializes when `column` keys are absent. - let de_no_cols: SourceCoverage = serde_json::from_str(&text_no_cols)?; - assert_eq!(de_no_cols, coverage); - - // Deserializes when `column` keys are present but `null`. - let de_null_cols: SourceCoverage = serde_json::from_str(&text_null_cols)?; - assert_eq!(de_null_cols, coverage); - - Ok(()) - } - - #[test] - fn test_source_coverage_partial_columns() -> Result<()> { - let text = serde_json::to_string(&json!([ - { - "file": MAIN_C.to_owned(), - "locations": [ - { "line": 4, "column": 4, "count": 1 }, - { "line": 9, "column": 4, "count": 0 }, - { "line": 12, "column": 4, "count": 1 }, - ], - }, - { - "file": COMMON_C.to_owned(), - "locations": [ - { "line": 5, "column": null, "count": 0 }, - { "line": 5, "column": null, "count": 1 }, - { "line": 8, "column": null, "count": 0 }, - ], - }, - ]))?; - - let coverage = { - let files = vec![ - SourceFileCoverage { - file: MAIN_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 4, - column: Some(4), - count: 1, - }, - SourceCoverageLocation { - line: 9, - column: Some(4), - count: 0, - }, - SourceCoverageLocation { - line: 12, - column: Some(4), - count: 1, - }, - ], - }, - SourceFileCoverage { - file: COMMON_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 5, - column: None, - count: 0, - }, - SourceCoverageLocation { - line: 5, - column: None, - count: 1, - }, - SourceCoverageLocation { - line: 8, - column: None, - count: 0, - }, - ], - }, - ]; - SourceCoverage { files } - }; - - let ser = serde_json::to_string(&coverage)?; - assert_eq!(ser, text); - - let de: SourceCoverage = serde_json::from_str(&text)?; - assert_eq!(de, coverage); - - Ok(()) - } - - #[test] - fn test_source_coverage_mixed_columns() -> Result<()> { - let text = serde_json::to_string(&json!([ - { - "file": MAIN_C.to_owned(), - "locations": [ - { "line": 4, "column": null, "count": 1 }, - { "line": 9, "column": 4, "count": 0 }, - { "line": 12, "column": null, "count": 1 }, - { "line": 13, "column": 7, "count": 0 }, - ], - }, - ]))?; - - let coverage = { - let files = vec![SourceFileCoverage { - file: MAIN_C.to_owned(), - locations: vec![ - SourceCoverageLocation { - line: 4, - column: None, - count: 1, - }, - SourceCoverageLocation { - line: 9, - column: Some(4), - count: 0, - }, - SourceCoverageLocation { - line: 12, - column: None, - count: 1, - }, - SourceCoverageLocation { - line: 13, - column: Some(7), - count: 0, - }, - ], - }]; - SourceCoverage { files } - }; - - let ser = serde_json::to_string(&coverage)?; - assert_eq!(ser, text); - - let de: SourceCoverage = serde_json::from_str(&text)?; - assert_eq!(de, coverage); - - Ok(()) - } -} diff --git a/src/agent/coverage-legacy/src/test.rs b/src/agent/coverage-legacy/src/test.rs deleted file mode 100644 index dc90209d75..0000000000 --- a/src/agent/coverage-legacy/src/test.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::Result; - -use crate::code::ModulePath; - -/// Given a POSIX-style path as a string, construct a valid absolute path for -/// the target OS and return it as a checked `ModulePath`. -pub fn module_path(posix_path: &str) -> Result { - let mut p = std::path::PathBuf::default(); - - // Ensure that the new path is absolute. - if cfg!(target_os = "windows") { - p.push("c:\\"); - } else { - p.push("/"); - } - - // Remove any affixed POSIX path separators, then split on any internal - // separators and add each component to our accumulator path in an - // OS-specific way. - for c in posix_path.trim_matches('/').split('/') { - p.push(c); - } - - ModulePath::new(p) -} diff --git a/src/agent/coverage/Cargo.toml b/src/agent/coverage/Cargo.toml index 27f2fe61c5..48143f52a3 100644 --- a/src/agent/coverage/Cargo.toml +++ b/src/agent/coverage/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" license = "MIT" [dependencies] -anyhow = "1.0" +anyhow = { version = "1.0", features = ["backtrace"] } cobertura = { path = "../cobertura" } debuggable-module = { path = "../debuggable-module" } iced-x86 = "1.17" diff --git a/src/agent/onefuzz-task/Cargo.toml b/src/agent/onefuzz-task/Cargo.toml index 5e91817ed6..d4f788e132 100644 --- a/src/agent/onefuzz-task/Cargo.toml +++ b/src/agent/onefuzz-task/Cargo.toml @@ -16,7 +16,8 @@ async-trait = "0.1" atexit = { path = "../atexit" } backoff = { version = "0.4", features = ["tokio"] } clap = "2.34" -coverage = { package = "coverage-legacy", path = "../coverage-legacy" } +cobertura = { path = "../cobertura" } +coverage = { path = "../coverage" } crossterm = "0.22" env_logger = "0.9" flume = "0.10" @@ -25,6 +26,7 @@ hex = "0.4" lazy_static = "1.4" log = "0.4" num_cpus = "1.15" +onefuzz-file-format = { path = "../onefuzz-file-format" } regex = "1.6.0" reqwest = { version = "0.11", features = [ "json", diff --git a/src/agent/onefuzz-task/src/local/common.rs b/src/agent/onefuzz-task/src/local/common.rs index 54282315f3..baf8fc73d5 100644 --- a/src/agent/onefuzz-task/src/local/common.rs +++ b/src/agent/onefuzz-task/src/local/common.rs @@ -40,9 +40,6 @@ pub const CHECK_FUZZER_HELP: &str = "check_fuzzer_help"; pub const DISABLE_CHECK_DEBUGGER: &str = "disable_check_debugger"; pub const REGRESSION_REPORTS_DIR: &str = "regression_reports_dir"; -#[cfg(any(target_os = "linux", target_os = "windows"))] -pub const COVERAGE_FILTER: &str = "coverage_filter"; - pub const TARGET_EXE: &str = "target_exe"; pub const TARGET_ENV: &str = "target_env"; pub const TARGET_OPTIONS: &str = "target_options"; diff --git a/src/agent/onefuzz-task/src/local/coverage.rs b/src/agent/onefuzz-task/src/local/coverage.rs index b076691cb5..6644a56412 100644 --- a/src/agent/onefuzz-task/src/local/coverage.rs +++ b/src/agent/onefuzz-task/src/local/coverage.rs @@ -4,8 +4,8 @@ use crate::{ local::common::{ build_local_context, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, - get_synced_dirs, CmdType, CHECK_FUZZER_HELP, COVERAGE_DIR, COVERAGE_FILTER, INPUTS_DIR, - READONLY_INPUTS, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, + get_synced_dirs, CmdType, CHECK_FUZZER_HELP, COVERAGE_DIR, INPUTS_DIR, READONLY_INPUTS, + TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, }, tasks::{ config::CommonConfig, @@ -30,7 +30,6 @@ pub fn build_coverage_config( let target_env = get_cmd_env(CmdType::Target, args)?; let mut target_options = get_cmd_arg(CmdType::Target, args); let target_timeout = value_t!(args, TARGET_TIMEOUT, u64).ok(); - let coverage_filter = value_t!(args, TARGET_TIMEOUT, String).ok(); let readonly_inputs = if local_job { vec![ @@ -56,7 +55,10 @@ pub fn build_coverage_config( target_env, target_options, target_timeout, - coverage_filter, + coverage_filter: None, + function_allowlist: None, + module_allowlist: None, + source_allowlist: None, input_queue, readonly_inputs, coverage, @@ -99,9 +101,6 @@ pub fn build_shared_args(local_job: bool) -> Vec> { Arg::with_name(TARGET_TIMEOUT) .takes_value(true) .long(TARGET_TIMEOUT), - Arg::with_name(COVERAGE_FILTER) - .takes_value(true) - .long(COVERAGE_FILTER), Arg::with_name(COVERAGE_DIR) .takes_value(true) .required(!local_job) diff --git a/src/agent/onefuzz-task/src/tasks/coverage/generic.rs b/src/agent/onefuzz-task/src/tasks/coverage/generic.rs index 554623e012..84b9436c5c 100644 --- a/src/agent/onefuzz-task/src/tasks/coverage/generic.rs +++ b/src/agent/onefuzz-task/src/tasks/coverage/generic.rs @@ -2,22 +2,26 @@ // Licensed under the MIT License. use std::collections::HashMap; +use std::convert::TryFrom; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; -use std::sync::{Arc, Mutex}; use std::time::Duration; -use anyhow::{Context, Result}; +use anyhow::{bail, Context, Result}; use async_trait::async_trait; -use coverage::block::CommandBlockCov; -use coverage::cache::ModuleCache; -use coverage::cobertura::cobertura; -use coverage::code::{CmdFilter, CmdFilterDef}; -use coverage::debuginfo::DebugInfo; +use cobertura::CoberturaCoverage; +use coverage::allowlist::{AllowList, TargetAllowList}; +use coverage::binary::BinaryCoverage; +use coverage::record::CoverageRecorder; +use coverage::source::{binary_to_source_coverage, SourceCoverage}; +use onefuzz::env::LD_LIBRARY_PATH; use onefuzz::expand::{Expand, PlaceHolder}; use onefuzz::syncdir::SyncedDir; +use onefuzz_file_format::coverage::{ + binary::{v1::BinaryCoverageJson as BinaryCoverageJsonV1, BinaryCoverageJson}, + source::{v1::SourceCoverageJson as SourceCoverageJsonV1, SourceCoverageJson}, +}; use onefuzz_telemetry::{warn, Event::coverage_data, EventData}; -use serde::de::DeserializeOwned; use storage_queue::{Message, QueueClient}; use tokio::fs; use tokio::task::spawn_blocking; @@ -27,17 +31,18 @@ use url::Url; use crate::tasks::config::CommonConfig; use crate::tasks::generic::input_poller::{CallbackImpl, InputPoller, Processor}; use crate::tasks::heartbeat::{HeartbeatSender, TaskHeartbeatClient}; -use crate::tasks::utils::{resolve_setup_relative_path, try_resolve_setup_relative_path}; +use crate::tasks::utils::try_resolve_setup_relative_path; use super::COBERTURA_COVERAGE_FILE; const MAX_COVERAGE_RECORDING_ATTEMPTS: usize = 2; const COVERAGE_FILE: &str = "coverage.json"; const SOURCE_COVERAGE_FILE: &str = "source-coverage.json"; -const MODULE_CACHE_FILE: &str = "module-cache.json"; const DEFAULT_TARGET_TIMEOUT: Duration = Duration::from_secs(120); +const WINDOWS_INTERCEPTOR_DENYLIST: &str = include_str!("generic/windows-interceptor.list"); + #[derive(Debug, Deserialize)] pub struct Config { pub target_exe: PathBuf, @@ -45,8 +50,15 @@ pub struct Config { pub target_options: Vec, pub target_timeout: Option, + // Deprecated. + // + // Retained only to informatively fail tasks that were qeueued pre-upgrade. pub coverage_filter: Option, + pub function_allowlist: Option, + pub module_allowlist: Option, + pub source_allowlist: Option, + pub input_queue: Option, pub readonly_inputs: Vec, pub coverage: SyncedDir, @@ -77,16 +89,26 @@ impl CoverageTask { pub async fn run(&mut self) -> Result<()> { info!("starting coverage task"); - self.config.coverage.init_pull().await?; + if self.config.coverage_filter.is_some() { + bail!("the `coverage_filter` option for the `coverage` task is deprecated"); + } - let cache = deserialize_or_default(MODULE_CACHE_FILE).await?; + self.config.coverage.init_pull().await?; let coverage_file = self.config.coverage.local_path.join(COVERAGE_FILE); - let coverage = deserialize_or_default(coverage_file).await?; - let filter = self.load_filter().await?; + let coverage = { + if let Ok(text) = fs::read_to_string(&coverage_file).await { + let json = BinaryCoverageJson::deserialize(&text)?; + BinaryCoverage::try_from(json)? + } else { + BinaryCoverage::default() + } + }; + + let allowlist = self.load_target_allowlist().await?; let heartbeat = self.config.common.init_heartbeat(None).await?; - let mut context = TaskContext::new(cache, &self.config, coverage, filter, heartbeat); + let mut context = TaskContext::new(&self.config, coverage, allowlist, heartbeat); if !context.uses_input() { bail!("input is not specified on the command line or arguments for the target"); @@ -132,78 +154,61 @@ impl CoverageTask { Ok(()) } - async fn load_filter(&self) -> Result { - let raw_filter_path = if let Some(raw_path) = &self.config.coverage_filter { - raw_path - } else { - return Ok(CmdFilter::default()); - }; - - let resolved = - resolve_setup_relative_path(&self.config.common.setup_dir, raw_filter_path).await?; - let filter_path = if let Some(path) = resolved { - path - } else { - error!( - "unable to resolve setup-relative coverage filter path: {}", - raw_filter_path - ); - return Ok(CmdFilter::default()); - }; + async fn load_target_allowlist(&self) -> Result { + // By default, all items are allowed. + // + // We will check for user allowlists for each item type. On Windows, we must ensure some + // source files are excluded. + let mut allowlist = TargetAllowList::default(); - let data = fs::read(&filter_path).await?; - let def: CmdFilterDef = serde_json::from_slice(&data)?; - let filter = CmdFilter::new(def)?; - - Ok(filter) - } -} + if let Some(functions) = &self.config.function_allowlist { + allowlist.functions = self.load_allowlist(functions).await?; + } -async fn deserialize_or_default(path: impl AsRef) -> Result -where - T: Default + DeserializeOwned, -{ - use tokio::io::ErrorKind::NotFound; + if let Some(modules) = &self.config.module_allowlist { + allowlist.modules = self.load_allowlist(modules).await?; + } - let data = fs::read(path).await; + if let Some(source_files) = &self.config.source_allowlist { + allowlist.source_files = self.load_allowlist(source_files).await?; + } - if let Err(err) = &data { - if err.kind() == NotFound { - return Ok(T::default()); + if cfg!(target_os = "windows") { + // If on Windows, add a base denylist which excludes sanitizer-intercepted CRT and + // process startup functions. Setting software breakpoints in these functions breaks + // interceptor init, and causes test case execution to diverge. + let interceptor_denylist = AllowList::parse(WINDOWS_INTERCEPTOR_DENYLIST)?; + allowlist.source_files.extend(&interceptor_denylist); } - } - let data = data?; + Ok(allowlist) + } - Ok(serde_json::from_slice(&data)?) + async fn load_allowlist(&self, path: &str) -> Result { + let resolved = try_resolve_setup_relative_path(&self.config.common.setup_dir, path).await?; + let text = fs::read_to_string(&resolved).await?; + AllowList::parse(&text) + } } struct TaskContext<'a> { - cache: Arc>, config: &'a Config, - coverage: CommandBlockCov, - debuginfo: Mutex, - filter: CmdFilter, + coverage: BinaryCoverage, + allowlist: TargetAllowList, heartbeat: Option, } impl<'a> TaskContext<'a> { pub fn new( - cache: ModuleCache, config: &'a Config, - coverage: CommandBlockCov, - filter: CmdFilter, + coverage: BinaryCoverage, + allowlist: TargetAllowList, heartbeat: Option, ) -> Self { - let cache = Arc::new(Mutex::new(cache)); - let debuginfo = Mutex::new(DebugInfo::default()); - Self { - cache, config, coverage, - debuginfo, - filter, + allowlist, heartbeat, } } @@ -245,25 +250,30 @@ impl<'a> TaskContext<'a> { async fn try_record_input(&mut self, input: &Path) -> Result<()> { let coverage = self.record_impl(input).await?; - self.coverage.merge_max(&coverage); + self.coverage.merge(&coverage); Ok(()) } - async fn record_impl(&mut self, input: &Path) -> Result { - let cache = Arc::clone(&self.cache); - let filter = self.filter.clone(); + async fn record_impl(&mut self, input: &Path) -> Result { + let allowlist = self.allowlist.clone(); let cmd = self.command_for_input(input).await?; let timeout = self.config.timeout(); - let coverage = spawn_blocking(move || { - let mut cache = cache - .lock() - .map_err(|_| format_err!("module cache mutex lock was poisoned"))?; - record_os_impl(cmd, timeout, &mut cache, filter) + let recorded = spawn_blocking(move || { + CoverageRecorder::new(cmd) + .allowlist(allowlist) + .timeout(timeout) + .record() }) .await??; - Ok(coverage) + if let Some(status) = recorded.output.status { + if !status.success() { + bail!("coverage recording failed, child status = {}", status); + } + } + + Ok(recorded.coverage) } fn uses_input(&self) -> bool { @@ -307,6 +317,43 @@ impl<'a> TaskContext<'a> { cmd.env(k, expand.evaluate_value(v)?); } + // Make shared library resolution on Linux match behavior in other tasks. + if cfg!(target_os = "linux") { + let cmd_ld_library_path = cmd + .get_envs() + .find(|(k, _)| *k == LD_LIBRARY_PATH) + .map(|(_, v)| v); + + // Depending on user-provided values, obtain a base value for `LD_LIBRARY_PATH`, which + // we will update to include the local root of the setup directory. + let ld_library_path = match cmd_ld_library_path { + None => { + // The user did not provide an `LD_LIBRARY_PATH`, so the child process will + // inherit the current actual value (if any). It would be best to never inherit + // the current environment in any user subprocess invocation, but since we do, + // preserve the existing behavior. + std::env::var_os(LD_LIBRARY_PATH).unwrap_or_default() + } + Some(None) => { + // This is actually unreachable, since it can only occur as the result of a call + // to `env_clear(LD_LIBRARY_PATH)`. Even if this could happen, we'd reset it to + // the setup dir, so use the empty path as our base. + "".into() + } + Some(Some(path)) => { + // `LD_LIBRARY_PATH` was set by the user-provided `target_env`, and we may have + // expanded some placeholder variables. Extend that. + path.to_owned() + } + }; + + // Add the setup directory to the library path and ensure it will occur in the child + // environment. + let ld_library_path = + onefuzz::env::update_path(ld_library_path, &self.config.common.setup_dir)?; + cmd.env(LD_LIBRARY_PATH, ld_library_path); + } + cmd.env_remove("RUST_LOG"); cmd.stdin(Stdio::null()); cmd.stdout(Stdio::piped()); @@ -359,32 +406,33 @@ impl<'a> TaskContext<'a> { } pub async fn save_and_sync_coverage(&self) -> Result<()> { + // JSON binary coverage. + let binary = self.coverage.clone(); + let json = BinaryCoverageJson::V1(BinaryCoverageJsonV1::from(binary)); + let text = serde_json::to_string(&json).context("serializing binary coverage")?; let path = self.config.coverage.local_path.join(COVERAGE_FILE); - let text = serde_json::to_string(&self.coverage).context("serializing block coverage")?; fs::write(&path, &text) .await .with_context(|| format!("writing coverage to {}", path.display()))?; + // JSON source coverage. + let source = self.source_coverage().await?; + let json = SourceCoverageJson::V1(SourceCoverageJsonV1::from(source.clone())); + let text = serde_json::to_string(&json).context("serializing source coverage")?; let path = self.config.coverage.local_path.join(SOURCE_COVERAGE_FILE); - let src_coverage = { - let mut debuginfo = self - .debuginfo - .lock() - .map_err(|e| anyhow::format_err!("{}", e))?; - self.coverage.source_coverage(&mut debuginfo)? - }; - let text = serde_json::to_string(&src_coverage).context("serializing source coverage")?; fs::write(&path, &text) .await .with_context(|| format!("writing source coverage to {}", path.display()))?; + // Cobertura XML source coverage. + let cobertura = CoberturaCoverage::from(source.clone()); + let text = cobertura.to_string()?; let path = self .config .coverage .local_path .join(COBERTURA_COVERAGE_FILE); - let cobertura_source_coverage = cobertura(src_coverage)?; - fs::write(&path, &cobertura_source_coverage) + fs::write(&path, &text) .await .with_context(|| format!("writing cobertura source coverage to {}", path.display()))?; @@ -392,37 +440,14 @@ impl<'a> TaskContext<'a> { Ok(()) } -} -#[cfg(target_os = "linux")] -fn record_os_impl( - cmd: Command, - timeout: Duration, - cache: &mut ModuleCache, - filter: CmdFilter, -) -> Result { - use coverage::block::linux::Recorder; + async fn source_coverage(&self) -> Result { + // Must be owned due to `spawn_blocking()` lifetimes. + let binary = self.coverage.clone(); - let coverage = Recorder::record(cmd, timeout, cache, filter)?; - - Ok(coverage) -} - -#[cfg(target_os = "windows")] -fn record_os_impl( - cmd: Command, - timeout: Duration, - cache: &mut ModuleCache, - filter: CmdFilter, -) -> Result { - use coverage::block::windows::{Recorder, RecorderEventHandler}; - - let mut recorder = Recorder::new(cache, filter); - let mut handler = RecorderEventHandler::new(&mut recorder, timeout); - handler.run(cmd)?; - let coverage = recorder.into_coverage(); - - Ok(coverage) + // Conversion to source coverage heavy on blocking I/O. + spawn_blocking(move || binary_to_source_coverage(&binary)).await? + } } #[async_trait] @@ -446,14 +471,14 @@ struct CoverageStats { } impl CoverageStats { - pub fn new(coverage: &CommandBlockCov) -> Self { + pub fn new(coverage: &BinaryCoverage) -> Self { let mut stats = CoverageStats::default(); - for (_, module) in coverage.iter() { - for block in module.blocks.values() { + for (_, module) in coverage.modules.iter() { + for count in module.offsets.values() { stats.features += 1; - if block.count > 0 { + if count.reached() { stats.covered += 1; } } diff --git a/src/agent/onefuzz-task/src/tasks/coverage/generic/windows-interceptor.list b/src/agent/onefuzz-task/src/tasks/coverage/generic/windows-interceptor.list new file mode 100644 index 0000000000..3669bbadec --- /dev/null +++ b/src/agent/onefuzz-task/src/tasks/coverage/generic/windows-interceptor.list @@ -0,0 +1,15 @@ +# Required to avoid recording errors. +! *\llvm-project\compiler-rt\* +! *\vctools\crt\* +! *\Windows Kits\10\Include\*\ucrt\* +! *\ExternalAPIs\Windows\10\sdk\* +! *\ExternalAPIs\UnifiedCRT\* +! minkernel\crts\* +! vccrt\vcruntime\* + +# Optional, reduces noise. +! *\Microsoft Visual Studio\*\VC\Tools\MSVC\*\include\* +! *\Windows Kits\10\include\*\um\* +! *\vctools\langapi\* +! onecore\internal\sdk\inc\minwin\* +! shared\inc\* diff --git a/src/ci/check-dependencies.sh b/src/ci/check-dependencies.sh index aa0cbf4d62..fe35f0b46f 100755 --- a/src/ci/check-dependencies.sh +++ b/src/ci/check-dependencies.sh @@ -1,9 +1,9 @@ #!/bin/bash # This script checks the OneFuzz agent binaries to ensure -# that we don't accidentally change their dependencies, and +# that we don't accidentally change their dependencies, and # create a binary that won't work on our standard Ubuntu images. -# +# # If we do make changes on purpose, the lists below should be updated. # See issue and related links: @@ -19,11 +19,11 @@ function get-deps { function check { wanted=$2 - if [ "$(uname)" != 'Linux' ]; then + if [ "$(uname)" != 'Linux' ]; then wanted=$3 fi got=$(get-deps "$1") - if ! difference=$(diff -u --color <(echo "$wanted") <(echo "$got")); then + if ! difference=$(diff -u --color <(echo "$wanted") <(echo "$got")); then echo "unexpected dependencies for $1" echo "wanted:" echo "$wanted" @@ -40,9 +40,11 @@ check "$script_dir/../agent/target/release/onefuzz-task" \ "/lib64/ld-linux-x86-64.so.2 libc.so.6 libdl.so.2 +libgcc_s.so.1 liblzma.so.5 libm.so.6 libpthread.so.0 +libstdc++.so.6 libunwind-ptrace.so.0 libunwind-x86_64.so.8 libunwind.so.8 diff --git a/src/cli/onefuzz/api.py b/src/cli/onefuzz/api.py index fe959391a0..8360777eb5 100644 --- a/src/cli/onefuzz/api.py +++ b/src/cli/onefuzz/api.py @@ -979,7 +979,9 @@ def create( colocate: bool = False, report_list: Optional[List[str]] = None, minimized_stack_depth: Optional[int] = None, - coverage_filter: Optional[str] = None, + function_allowlist: Optional[str] = None, + module_allowlist: Optional[str] = None, + source_allowlist: Optional[str] = None, ) -> models.Task: """ Create a task @@ -1055,7 +1057,9 @@ def create( report_list=report_list, preserve_existing_outputs=preserve_existing_outputs, minimized_stack_depth=minimized_stack_depth, - coverage_filter=coverage_filter, + function_allowlist=function_allowlist, + module_allowlist=module_allowlist, + source_allowlist=source_allowlist, ), ) diff --git a/src/cli/onefuzz/templates/libfuzzer.py b/src/cli/onefuzz/templates/libfuzzer.py index d842176ad8..fc59842a25 100644 --- a/src/cli/onefuzz/templates/libfuzzer.py +++ b/src/cli/onefuzz/templates/libfuzzer.py @@ -68,7 +68,9 @@ def _create_tasks( check_fuzzer_help: bool = True, expect_crash_on_failure: bool = False, minimized_stack_depth: Optional[int] = None, - coverage_filter: Optional[str] = None, + function_allowlist: Optional[str] = None, + module_allowlist: Optional[str] = None, + source_allowlist: Optional[str] = None, analyzer_exe: Optional[str] = None, analyzer_options: Optional[List[str]] = None, analyzer_env: Optional[Dict[str, str]] = None, @@ -218,7 +220,9 @@ def _create_tasks( debug=debug, colocate=colocate_all_tasks or colocate_secondary_tasks, check_fuzzer_help=check_fuzzer_help, - coverage_filter=coverage_filter, + function_allowlist=function_allowlist, + module_allowlist=module_allowlist, + source_allowlist=source_allowlist, ) report_containers = [ @@ -323,7 +327,9 @@ def basic( check_fuzzer_help: bool = True, expect_crash_on_failure: bool = False, minimized_stack_depth: Optional[int] = None, - coverage_filter: Optional[File] = None, + function_allowlist: Optional[File] = None, + module_allowlist: Optional[File] = None, + source_allowlist: Optional[File] = None, analyzer_exe: Optional[str] = None, analyzer_options: Optional[List[str]] = None, analyzer_env: Optional[Dict[str, str]] = None, @@ -396,12 +402,26 @@ def basic( target_exe_blob_name = helper.setup_relative_blob_name(target_exe, setup_dir) - if coverage_filter: - coverage_filter_blob_name: Optional[str] = helper.setup_relative_blob_name( - coverage_filter, setup_dir + if function_allowlist: + function_allowlist_blob_name: Optional[ + str + ] = helper.setup_relative_blob_name(function_allowlist, setup_dir) + else: + function_allowlist_blob_name = None + + if module_allowlist: + module_allowlist_blob_name: Optional[str] = helper.setup_relative_blob_name( + module_allowlist, setup_dir + ) + else: + module_allowlist_blob_name = None + + if source_allowlist: + source_allowlist_blob_name: Optional[str] = helper.setup_relative_blob_name( + source_allowlist, setup_dir ) else: - coverage_filter_blob_name = None + source_allowlist_blob_name = None self._create_tasks( job=helper.job, @@ -425,7 +445,9 @@ def basic( check_fuzzer_help=check_fuzzer_help, expect_crash_on_failure=expect_crash_on_failure, minimized_stack_depth=minimized_stack_depth, - coverage_filter=coverage_filter_blob_name, + function_allowlist=function_allowlist_blob_name, + module_allowlist=module_allowlist_blob_name, + source_allowlist=source_allowlist_blob_name, analyzer_exe=analyzer_exe, analyzer_options=analyzer_options, analyzer_env=analyzer_env, diff --git a/src/pytypes/onefuzztypes/enums.py b/src/pytypes/onefuzztypes/enums.py index 8269c37e72..2d9f4c9c4d 100644 --- a/src/pytypes/onefuzztypes/enums.py +++ b/src/pytypes/onefuzztypes/enums.py @@ -81,6 +81,9 @@ class TaskFeature(Enum): report_list = "report_list" minimized_stack_depth = "minimized_stack_depth" coverage_filter = "coverage_filter" + function_allowlist = "function_allowlist" + module_allowlist = "module_allowlist" + source_allowlist = "source_allowlist" target_must_use_input = "target_must_use_input" target_assembly = "target_assembly" target_class = "target_class" diff --git a/src/pytypes/onefuzztypes/models.py b/src/pytypes/onefuzztypes/models.py index fd5493c2e7..3f5760e1a3 100644 --- a/src/pytypes/onefuzztypes/models.py +++ b/src/pytypes/onefuzztypes/models.py @@ -163,6 +163,9 @@ class TaskDetails(BaseModel): report_list: Optional[List[str]] minimized_stack_depth: Optional[int] coverage_filter: Optional[str] + function_allowlist: Optional[str] + module_allowlist: Optional[str] + source_allowlist: Optional[str] target_assembly: Optional[str] target_class: Optional[str] target_method: Optional[str] @@ -385,6 +388,9 @@ class TaskUnitConfig(BaseModel): report_list: Optional[List[str]] minimized_stack_depth: Optional[int] coverage_filter: Optional[str] + function_allowlist: Optional[str] + module_allowlist: Optional[str] + source_allowlist: Optional[str] target_assembly: Optional[str] target_class: Optional[str] target_method: Optional[str]