From e4346b778d2fcad3ec573bcdf0b9c1c717657caa Mon Sep 17 00:00:00 2001 From: Bowei Zhang Date: Mon, 11 Nov 2024 14:15:30 -0800 Subject: [PATCH] fix: better handling of hyperlinks with parentheses --- config/src/config.rs | 8 ++++--- termwiz/src/hyperlink.rs | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/config/src/config.rs b/config/src/config.rs index 113cdbfba4c..644c2def21f 100644 --- a/config/src/config.rs +++ b/config/src/config.rs @@ -1666,9 +1666,11 @@ pub fn default_hyperlink_rules() -> Vec { hyperlink::Rule::with_highlight(r"\((\w+://\S+)\)", "$1", 1).unwrap(), hyperlink::Rule::with_highlight(r"\[(\w+://\S+)\]", "$1", 1).unwrap(), hyperlink::Rule::with_highlight(r"<(\w+://\S+)>", "$1", 1).unwrap(), - // Then handle URLs not wrapped in brackets - // and include terminating ), / or - characters, if any - hyperlink::Rule::new(r"\b\w+://\S+[)/a-zA-Z0-9-]+", "$0").unwrap(), + // Then handle URLs not wrapped in brackets that + // 1) have a balanced ending parenthesis or + hyperlink::Rule::new(hyperlink::CLOSING_PARENTHESIS_HYPERLINK_PATTERN, "$0").unwrap(), + // 2) include terminating _, / or - characters, if any + hyperlink::Rule::new(hyperlink::GENERIC_HYPERLINK_PATTERN, "$0").unwrap(), // implicit mailto link hyperlink::Rule::new(r"\b\w+@[\w-]+(\.[\w-]+)+\b", "mailto:$0").unwrap(), ] diff --git a/termwiz/src/hyperlink.rs b/termwiz/src/hyperlink.rs index 159298f7cde..24749ae77e2 100644 --- a/termwiz/src/hyperlink.rs +++ b/termwiz/src/hyperlink.rs @@ -265,6 +265,9 @@ impl<'t> Match<'t> { result } } +pub const CLOSING_PARENTHESIS_HYPERLINK_PATTERN: &str = + r"\b\w+://[^\s()]*\(\S*\)(?=\s|$|[^_-/a-zA-Z0-9])"; +pub const GENERIC_HYPERLINK_PATTERN: &str = r"\b\w+://\S+[_-/a-zA-Z0-9]"; impl Rule { /// Construct a new rule. It may fail if the regex is invalid. @@ -347,4 +350,51 @@ mod test { ] ); } + + #[test] + fn parse_with_parentheses() { + fn assert_helper(test_uri: &str, expected_uri: &str, msg: &str) { + let rules = vec![ + Rule::new(CLOSING_PARENTHESIS_HYPERLINK_PATTERN, "$0").unwrap(), + Rule::new(GENERIC_HYPERLINK_PATTERN, "$0").unwrap(), + ]; + + assert_eq!( + Rule::match_hyperlinks(test_uri, &rules)[0].link.uri, + expected_uri, + "{}", + msg, + ); + } + + assert_helper( + " http://example.com)", + "http://example.com", + "Unblanced terminating parenthesis should not be captured.", + ); + + assert_helper( + "http://example.com/(complete_parentheses)", + "http://example.com/(complete_parentheses)", + "Balanced terminating parenthesis should be captureed.", + ); + + assert_helper( + "http://example.com/(complete_parentheses)>", + "http://example.com/(complete_parentheses)", + "Non-URL characters after a balanced terminating parenthesis should be dropped.", + ); + + assert_helper( + "http://example.com/(complete_parentheses))", + "http://example.com/(complete_parentheses))", + "Non-terminating parentheses should not impact matching the entire URL - Terminated with )", + ); + + assert_helper( + "http://example.com/(complete_parentheses)-((-)-()-_-", + "http://example.com/(complete_parentheses)-((-)-()-_-", + "Non-terminating parentheses should not impact matching the entire URL - Terminated with a valid character", + ); + } }