diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac2ace475..5b1e9f644 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,7 @@ jobs: - nightly - macos - win-msvc + - win-msvc (nightly) - win-gnu include: - build: pinned @@ -55,6 +56,9 @@ jobs: - build: win-msvc os: windows-latest rust: stable + - build: win-msvc (nightly) + os: windows-latest + rust: nightly - build: win-gnu os: windows-latest rust: stable-x86_64-gnu @@ -154,6 +158,13 @@ jobs: run: | cargo test --test default --no-default-features --features 'std pattern unicode-perl' + # The #[debugger_visualizer] attribute is currently gated behind an unstable feature flag. + # In order to test the visualizers for the regex crate, they have to be tested on a nightly build. + - if: matrix.build == 'win-msvc (nightly)' + name: Run tests with debugger_visualizer feature + run: | + cargo test --test visualizers --features 'debugger_visualizer' -- --test-threads=1 + rustfmt: name: rustfmt runs-on: ubuntu-18.04 diff --git a/Cargo.toml b/Cargo.toml index 82df4ad2f..75f78119b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -104,6 +104,9 @@ unstable = ["pattern"] # by default if the unstable feature is enabled. pattern = [] +# Enable to use the #[debugger_visualizer] attribute. +debugger_visualizer = [] + # For very fast prefix literal matching. [dependencies.aho-corasick] version = "0.7.18" @@ -132,6 +135,9 @@ rand = { version = "0.8.3", default-features = false, features = ["getrandom", " # See: https://github.com/rust-lang/regex/issues/684 # See: https://github.com/rust-lang/regex/issues/685 # doc-comment = "0.3" +# To test debugger visualizers defined for the regex crate such as regex.natvis +debugger_test = "0.1.0" +debugger_test_parser = "0.1.0" # Run the test suite on the default behavior of Regex::new. # This includes a mish mash of NFAs and DFAs, which are chosen automatically @@ -184,6 +190,12 @@ name = "backtrack-bytes" path = "tests/test_crates_regex.rs" name = "crates-regex" +[[test]] +path = "tests/test_visualizers.rs" +name = "visualizers" +required-features = ["debugger_visualizer"] +test = false + [profile.release] debug = true diff --git a/HACKING.md b/HACKING.md index 34af5b517..6194e8821 100644 --- a/HACKING.md +++ b/HACKING.md @@ -271,6 +271,13 @@ invoking `cargo test`. Note that this variable is inspected at compile time, so if the tests don't seem to be running, you may need to run `cargo clean`. +This crate also supports defining and testing custom debugger visualizers. +The `#[debugger_visualizer]` attribute is currently unstable and behind a +`debugger_visualizer` feature gate. To test these visualizers, enable the +`debugger_visualizer` feature for this crate and run the `tests/test_visualizer.rs` +tests using the nightly toolchain. For more information on debugger visualizers, +see `debug_metadata/README.md`. + ## Benchmarking The benchmarking in this crate is made up of many micro-benchmarks. Currently, diff --git a/debug_metadata/README.md b/debug_metadata/README.md new file mode 100644 index 000000000..b11481df1 --- /dev/null +++ b/debug_metadata/README.md @@ -0,0 +1,111 @@ +## Debugger Visualizers + +Many languages and debuggers enable developers to control how a type is +displayed in a debugger. These are called "debugger visualizations" or "debugger +views". + +The Windows debuggers (WinDbg\CDB) support defining custom debugger visualizations using +the `Natvis` framework. To use Natvis, developers write XML documents using the natvis +schema that describe how debugger types should be displayed with the `.natvis` extension. +(See: https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects?view=vs-2019) +The Natvis files provide patterns which match type names a description of how to display +those types. + +The Natvis schema can be found either online (See: https://code.visualstudio.com/docs/cpp/natvis#_schema) +or locally at `\Xml\Schemas\1033\natvis.xsd`. + +The GNU debugger (GDB) supports defining custom debugger views using Pretty Printers. +Pretty printers are written as python scripts that describe how a type should be displayed +when loaded up in GDB/LLDB. (See: https://sourceware.org/gdb/onlinedocs/gdb/Pretty-Printing.html#Pretty-Printing) +The pretty printers provide patterns, which match type names, and for matching +types, descibe how to display those types. (For writing a pretty printer, see: https://sourceware.org/gdb/onlinedocs/gdb/Writing-a-Pretty_002dPrinter.html#Writing-a-Pretty_002dPrinter). + +### Embedding Visualizers + +Through the use of the currently unstable `#[debugger_visualizer]` attribute, the `regex` +crate can embed debugger visualizers into the crate metadata. + +Currently the two types of visualizers supported are Natvis and Pretty printers. + +For Natvis files, when linking an executable with a crate that includes Natvis files, +the MSVC linker will embed the contents of all Natvis files into the generated `PDB`. + +For pretty printers, the compiler will encode the contents of the pretty printer +in the `.debug_gdb_scripts` section of the `ELF` generated. + +### Testing Visualizers + +The `regex` crate supports testing debugger visualizers defined for this crate. The entry point for +these tests are `tests/test_visualizer.rs`. These tests are defined using the `debugger_test` and +`debugger_test_parser` crates. The `debugger_test` crate is a proc macro crate which defines a +single proc macro attribute, `#[debugger_test]`. For more detailed information about this crate, +see https://crates.io/crates/debugger_test. The CI pipeline for the `regex` crate has been updated +to run the debugger visualizer tests to ensure debugger visualizers do not become broken/stale. + +The `#[debugger_test]` proc macro attribute may only be used on test functions and will run the +function under the debugger specified by the `debugger` meta item. + +This proc macro attribute has 3 required values: + +1. The first required meta item, `debugger`, takes a string value which specifies the debugger to launch. +2. The second required meta item, `commands`, takes a string of new line (`\n`) separated list of debugger +commands to run. +3. The third required meta item, `expected_statements`, takes a string of new line (`\n`) separated list of +statements that must exist in the debugger output. Pattern matching through regular expressions is also +supported by using the `pattern:` prefix for each expected statement. + +#### Example: + +```rust +#[debugger_test( + debugger = "cdb", + commands = "command1\ncommand2\ncommand3", + expected_statements = "statement1\nstatement2\nstatement3")] +fn test() { + +} +``` + +Using a multiline string is also supported, with a single debugger command/expected statement per line: + +```rust +#[debugger_test( + debugger = "cdb", + commands = " +command1 +command2 +command3", + expected_statements = " +statement1 +pattern:statement[0-9]+ +statement3")] +fn test() { + +} +``` + +In the example above, the second expected statement uses pattern matching through a regular expression +by using the `pattern:` prefix. + +#### Testing Locally + +Currently, only Natvis visualizations have been defined for the `regex` crate via `debug_metadata/regex.natvis`, +which means the `tests/test_visualizer.rs` tests need to be run on Windows using the `*-pc-windows-msvc` targets. +To run these tests locally, first ensure the debugging tools for Windows are installed or install them following +the steps listed here, [Debugging Tools for Windows](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/). +Once the debugging tools have been installed, the tests can be run in the same manner as they are in the CI +pipeline. + +#### Note + +When running the debugger visualizer tests, `tests/test_visualizer.rs`, they need to be run consecutively +and not in parallel. This can be achieved by passing the flag `--test-threads=1` to rustc. This is due to +how the debugger tests are run. Each test marked with the `#[debugger_test]` attribute launches a debugger +and attaches it to the current test process. If tests are running in parallel, the test will try to attach +a debugger to the current process which may already have a debugger attached causing the test to fail. + +For example: + +``` +cargo test --test visualizers --features debugger_visualizer -- --test-threads=1 +``` diff --git a/debug_metadata/regex.natvis b/debug_metadata/regex.natvis new file mode 100644 index 000000000..2f8d30f2d --- /dev/null +++ b/debug_metadata/regex.natvis @@ -0,0 +1,105 @@ + + + + {{ text={__0.pats[0]} }} + + __0 + + + + + + + + + + + + + + + {{ named_groups={named_groups.ptr.pointer->data.base.table.table.items} }} + + text + named_groups + + + + + + + (char*)text.data_ptr+location(i),[location(i+1)-location(i)]s8 + i+=2 + index++ + + + + + + + {text.data_ptr+start,[end-start]s8} + + text + + {(char*)text.data_ptr+start,[end-start]s8} + + start,d + end,d + + + + + {{ text={__0.ro.ptr.pointer->data.res[0]} }} + + __0.ro + + + + + + + + + + + + + + + {{ named_groups={named_groups.ptr.pointer->data.base.table.table.items} }} + + text + named_groups + + + + + + + (char*)text.data_ptr+location(i),[location(i+1)-location(i)]s8 + i+=2 + index++ + + + + + + + {text.data_ptr+start,[end-start]s8} + + text + + {(char*)text.data_ptr+start,[end-start]s8} + + start,d + end,d + + + + + {{ text={__0.ro.ptr.pointer->data.res[0]} }} + + __0.ro + + + diff --git a/src/lib.rs b/src/lib.rs index 6b95739c5..04073329b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -610,6 +610,11 @@ another matching engine with fixed memory requirements. #![deny(missing_docs)] #![cfg_attr(feature = "pattern", feature(pattern))] +#![cfg_attr(feature = "debugger_visualizer", feature(debugger_visualizer))] +#![cfg_attr( + feature = "debugger_visualizer", + debugger_visualizer(natvis_file = "../debug_metadata/regex.natvis") +)] #![warn(missing_debug_implementations)] #[cfg(not(feature = "std"))] diff --git a/tests/debugger_visualizer.rs b/tests/debugger_visualizer.rs new file mode 100644 index 000000000..bfe732c9f --- /dev/null +++ b/tests/debugger_visualizer.rs @@ -0,0 +1,191 @@ +use debugger_test::debugger_test; + +#[inline(never)] +fn __break() {} + +#[debugger_test( + debugger = "cdb", + commands = r#" +.nvlist +dv +dx re +dx captures +g +dx m1 +dx m2 +dx m3 +dx m4 +"#, + expected_statements = r#" +re : { text="^(?P\d{4})-(?P\d{2})-(?P\d{2})$" } [Type: regex::re_unicode::Regex] +[] [Type: regex::re_unicode::Regex] +[Reference count] : 0x2 [Type: core::sync::atomic::AtomicUsize] +[Weak reference count] : 0x1 [Type: core::sync::atomic::AtomicUsize] +[+0xc00] res : { len=0x1 } [Type: alloc::vec::Vec] +[+0x000] nfa [Type: regex::prog::Program] +[+0x320] dfa [Type: regex::prog::Program] +[+0x640] dfa_reverse [Type: regex::prog::Program] +[+0x960] suffixes [Type: regex::literal::imp::LiteralSearcher] +[+0xc18] ac : None [Type: enum2$ > >] +[+0xda0] match_type : Dfa [Type: enum2$] + +captures : { named_groups=0x3 } [Type: regex::re_unicode::Captures] +[] [Type: regex::re_unicode::Captures] +[text] : "2020-10-15" [Type: str] +pattern:\[named_groups\] : \{ len=0x3 \} \[Type: .*\] +pattern:\[0\] : .* : "2020-10-15" \[Type: char \*\] +pattern:\[1\] : .* : "2020" \[Type: char \*\] +pattern:\[2\] : .* : "10" \[Type: char \*\] +pattern:\[3\] : .* : "15" \[Type: char \*\] + +m1 : "2020-10-15" [Type: regex::re_unicode::Match] +[] [Type: regex::re_unicode::Match] +[text] : "2020-10-15" [Type: str] +[match_text] : "2020-10-15" +[start] : 0 [Type: unsigned __int64] +[end] : 10 [Type: unsigned __int64] + +m2 : "2020" [Type: regex::re_unicode::Match] +[] [Type: regex::re_unicode::Match] +[text] : "2020-10-15" [Type: str] +[match_text] : "2020" +[start] : 0 [Type: unsigned __int64] +[end] : 4 [Type: unsigned __int64] + +m3 : "10" [Type: regex::re_unicode::Match] +[] [Type: regex::re_unicode::Match] +[text] : "2020-10-15" [Type: str] +[match_text] : "10" +[start] : 5 [Type: unsigned __int64] +[end] : 7 [Type: unsigned __int64] + +m4 : "15" [Type: regex::re_unicode::Match] +[] [Type: regex::re_unicode::Match] +[text] : "2020-10-15" [Type: str] +[match_text] : "15" +[start] : 8 [Type: unsigned __int64] +[end] : 10 [Type: unsigned __int64] +"# +)] +fn test_debugger_visualizer() { + let re = regex::Regex::new( + r"^(?P\d{4})-(?P\d{2})-(?P\d{2})$", + ) + .unwrap(); + let text = "2020-10-15"; + + let captures = re.captures(text).unwrap(); + let matches = captures + .iter() + .filter_map(|capture| capture) + .collect::>(); + assert_eq!(4, matches.len()); + __break(); // #break + + let m1 = matches[0]; + assert_eq!("2020-10-15", m1.as_str()); + + let m2 = matches[1]; + assert_eq!("2020", m2.as_str()); + + let m3 = matches[2]; + assert_eq!("10", m3.as_str()); + + let m4 = matches[3]; + assert_eq!("15", m4.as_str()); + __break(); // #break +} + +#[debugger_test( + debugger = "cdb", + commands = r#" +.nvlist +dv +dx re +dx captures +g +dx m1 +dx m2 +dx m3 +dx m4 +"#, + expected_statements = r#" +re : { text="^(?P\d{4})-(?P\d{2})-(?P\d{2})$" } [Type: regex::re_bytes::Regex] +[] [Type: regex::re_bytes::Regex] +[Reference count] : 0x2 [Type: core::sync::atomic::AtomicUsize] +[Weak reference count] : 0x1 [Type: core::sync::atomic::AtomicUsize] +[+0xc00] res : { len=0x1 } [Type: alloc::vec::Vec] +[+0x000] nfa [Type: regex::prog::Program] +[+0x320] dfa [Type: regex::prog::Program] +[+0x640] dfa_reverse [Type: regex::prog::Program] +[+0x960] suffixes [Type: regex::literal::imp::LiteralSearcher] +[+0xc18] ac : None [Type: enum2$ > >] +[+0xda0] match_type : Dfa [Type: enum2$] + +captures : { named_groups=0x3 } [Type: regex::re_bytes::Captures] +[] [Type: regex::re_bytes::Captures] +[text] : { len=0xa } [Type: slice$] +pattern:\[named_groups\] : \{ len=0x3 \} \[Type: .*\] +pattern:\[0\] : .* : "2020-10-15" \[Type: char \*\] +pattern:\[1\] : .* : "2020" \[Type: char \*\] +pattern:\[2\] : .* : "10" \[Type: char \*\] +pattern:\[3\] : .* : "15" \[Type: char \*\] + +m1 : "2020-10-15" [Type: regex::re_bytes::Match] +[] [Type: regex::re_bytes::Match] +[text] : { len=0xa } [Type: slice$] +[match_text] : "2020-10-15" +[start] : 0 [Type: unsigned __int64] +[end] : 10 [Type: unsigned __int64] + +m2 : "2020" [Type: regex::re_bytes::Match] +[] [Type: regex::re_bytes::Match] +[text] : { len=0xa } [Type: slice$] +[match_text] : "2020" +[start] : 0 [Type: unsigned __int64] +[end] : 4 [Type: unsigned __int64] + +m3 : "10" [Type: regex::re_bytes::Match] +[] [Type: regex::re_bytes::Match] +[text] : { len=0xa } [Type: slice$] +[match_text] : "10" +[start] : 5 [Type: unsigned __int64] +[end] : 7 [Type: unsigned __int64] + +m4 : "15" [Type: regex::re_bytes::Match] +[] [Type: regex::re_bytes::Match] +[text] : { len=0xa } [Type: slice$] +[match_text] : "15" +[start] : 8 [Type: unsigned __int64] +[end] : 10 [Type: unsigned __int64] +"# +)] +fn test_bytes_debugger_visualizer() { + let re = regex::bytes::Regex::new( + r"^(?P\d{4})-(?P\d{2})-(?P\d{2})$", + ) + .unwrap(); + let text = b"2020-10-15"; + + let captures = re.captures(text).unwrap(); + let matches = captures + .iter() + .filter_map(|capture| capture) + .collect::>(); + assert_eq!(4, matches.len()); + __break(); // #break + + let m1 = matches[0]; + assert_eq!(b"2020-10-15", m1.as_bytes()); + + let m2 = matches[1]; + assert_eq!(b"2020", m2.as_bytes()); + + let m3 = matches[2]; + assert_eq!(b"10", m3.as_bytes()); + + let m4 = matches[3]; + assert_eq!(b"15", m4.as_bytes()); + + __break(); // #break +} diff --git a/tests/test_visualizers.rs b/tests/test_visualizers.rs new file mode 100644 index 000000000..0e0167c7c --- /dev/null +++ b/tests/test_visualizers.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "debugger_visualizer")] +mod debugger_visualizer;