Skip to content

Commit

Permalink
V15: Rich Text Editor links do not work with query strings and anchors (
Browse files Browse the repository at this point in the history
#17288)

* fix: anchors and query strings do not work

Since the change from UDIs to localLinks in href, the pattern matched a little too much in the href section completely ignoring any "extras" such as querystrings and anchors after the locallink, which meant that the locallink did not get replaced at all if they were present. This is fixed by limiting the regexp a bit.

* fix: legacy links do not follow the same regexp as new links

Because we are no longer matching the whole `href` attribute but only some of its contents, we need to fix up the old pattern. It has been extended with matching groups that follow the same pattern as the new links.

* feat: allow a-tags to be multiline

example:

```html
<a
  type="document"
  href="/{localLink:<GUID>}">
Test
</a>
```

* fix: split regex into two parts: first a tokenizer for a-tags and then a type-finder

* fix: ensure only "document" and "media" are matching to speed up the pattern

* feat: allow a-tags to be multiline

(cherry picked from commit 35e8f2e)
  • Loading branch information
iOvergaard committed Oct 17, 2024
1 parent 2b3a917 commit 1581eb6
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 43 deletions.
76 changes: 34 additions & 42 deletions src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ public sealed class HtmlLocalLinkParser
// <a type="media" href="/{localLink:7e21a725-b905-4c5f-86dc-8c41ec116e39}" title="media">media</a>
// <a type="document" href="/{localLink:eed5fc6b-96fd-45a5-a0f1-b1adfb483c2f}" title="other page">other page</a>
internal static readonly Regex LocalLinkTagPattern = new(
@"<a\s+(?:(?:(?:type=['""](?<type>document|media)['""].*?(?<locallink>href=[""']/{localLink:(?<guid>[a-fA-F0-9-]+)})[""'])|((?<locallink>href=[""']/{localLink:(?<guid>[a-fA-F0-9-]+)})[""'].*?type=(['""])(?<type>document|media)(?:['""])))|(?:(?:type=['""](?<type>document|media)['""])|(?:(?<locallink>href=[""']/{localLink:[a-fA-F0-9-]+})[""'])))[^>]*>",
@"<a.+?href=['""](?<locallink>\/?{localLink:(?<guid>[a-fA-F0-9-]+)})[^>]*?>",
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);

internal static readonly Regex TypePattern = new(
"""type=['"](?<type>(?:media|document))['"]""",
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);

internal static readonly Regex LocalLinkPattern = new(
@"href=""[/]?(?:\{|\%7B)localLink:([a-zA-Z0-9-://]+)(?:\}|\%7D)",
@"href=['""](?<locallink>\/?(?:\{|\%7B)localLink:(?<guid>[a-zA-Z0-9-://]+)(?:\}|\%7D))",
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);

private readonly IPublishedUrlProvider _publishedUrlProvider;
Expand Down Expand Up @@ -84,32 +88,28 @@ public string EnsureInternalLinks(string text)
{
if (tagData.Udi is not null)
{
var newLink = "#";
if (tagData.Udi?.EntityType == Constants.UdiEntityType.Document)
{
newLink = _publishedUrlProvider.GetUrl(tagData.Udi.Guid);
}
else if (tagData.Udi?.EntityType == Constants.UdiEntityType.Media)
var newLink = tagData.Udi?.EntityType switch
{
newLink = _publishedUrlProvider.GetMediaUrl(tagData.Udi.Guid);
}

Constants.UdiEntityType.Document => _publishedUrlProvider.GetUrl(tagData.Udi.Guid),
Constants.UdiEntityType.Media => _publishedUrlProvider.GetMediaUrl(tagData.Udi.Guid),
_ => ""

Check failure on line 95 in src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

View workflow job for this annotation

GitHub Actions / CodeQL-Build

Check failure on line 95 in src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

View workflow job for this annotation

GitHub Actions / CodeQL-Build

Check failure on line 95 in src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

View check run for this annotation

Azure Pipelines / Umbraco CMS 9+ (Build Build Umbraco CMS)

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs#L95

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs(95,26): Error SA1122: Use string.Empty for empty strings (https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1122.md)

Check failure on line 95 in src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

View check run for this annotation

Azure Pipelines / Umbraco CMS 9+

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs#L95

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs(95,26): Error SA1122: Use string.Empty for empty strings (https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1122.md)

Check failure on line 95 in src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

View check run for this annotation

Azure Pipelines / Nightly E2E Test Umbraco CMS 14 (Build Build Umbraco CMS)

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs#L95

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs(95,26): Error SA1122: Use string.Empty for empty strings (https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1122.md)

Check failure on line 95 in src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs

View check run for this annotation

Azure Pipelines / Nightly E2E Test Umbraco CMS 14

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs#L95

src/Umbraco.Core/Templates/HtmlLocalLinkParser.cs(95,26): Error SA1122: Use string.Empty for empty strings (https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1122.md)
};

text = StripTypeAttributeFromTag(text, tagData.Udi!.EntityType);
text = text.Replace(tagData.TagHref, "href=\"" + newLink);
text = text.Replace(tagData.TagHref, newLink);
}
else if (tagData.IntId.HasValue)
{
var newLink = _publishedUrlProvider.GetUrl(tagData.IntId.Value);
text = text.Replace(tagData.TagHref, "href=\"" + newLink);
text = text.Replace(tagData.TagHref, newLink);
}
}

return text;
}

// under normal circumstances, the type attribute is preceded by a space
// to cover the rare occasion where it isn't, we first replace with a a space and then without.
// to cover the rare occasion where it isn't, we first replace with a space and then without.
private string StripTypeAttributeFromTag(string tag, string type) =>
tag.Replace($" type=\"{type}\"", string.Empty)
.Replace($"type=\"{type}\"", string.Empty);
Expand All @@ -119,21 +119,22 @@ private IEnumerable<LocalLinkTag> FindLocalLinkIds(string text)
MatchCollection localLinkTagMatches = LocalLinkTagPattern.Matches(text);
foreach (Match linkTag in localLinkTagMatches)
{
if (linkTag.Groups.Count < 1)
if (Guid.TryParse(linkTag.Groups["guid"].Value, out Guid guid) is false)
{
continue;
}

if (Guid.TryParse(linkTag.Groups["guid"].Value, out Guid guid) is false)
// Find the type attribute
Match typeMatch = TypePattern.Match(linkTag.Value);
if (typeMatch.Success is false)
{
continue;
}

yield return new LocalLinkTag(
null,
new GuidUdi(linkTag.Groups["type"].Value, guid),
linkTag.Groups["locallink"].Value,
linkTag.Value);
new GuidUdi(typeMatch.Groups["type"].Value, guid),
linkTag.Groups["locallink"].Value);
}

// also return legacy results for values that have not been migrated
Expand All @@ -150,25 +151,26 @@ private IEnumerable<LocalLinkTag> FindLegacyLocalLinkIds(string text)
MatchCollection tags = LocalLinkPattern.Matches(text);
foreach (Match tag in tags)
{
if (tag.Groups.Count > 0)
if (tag.Groups.Count <= 0)
{
var id = tag.Groups[1].Value; // .Remove(tag.Groups[1].Value.Length - 1, 1);
continue;
}

// The id could be an int or a UDI
if (UdiParser.TryParse(id, out Udi? udi))
{
var guidUdi = udi as GuidUdi;
if (guidUdi is not null)
{
yield return new LocalLinkTag(null, guidUdi, tag.Value, null);
}
}
var id = tag.Groups["guid"].Value;

if (int.TryParse(id, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intId))
// The id could be an int or a UDI
if (UdiParser.TryParse(id, out Udi? udi))
{
if (udi is GuidUdi guidUdi)
{
yield return new LocalLinkTag (intId, null, tag.Value, null);
yield return new LocalLinkTag(null, guidUdi, tag.Groups["locallink"].Value);
}
}

if (int.TryParse(id, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intId))
{
yield return new LocalLinkTag (intId, null, tag.Groups["locallink"].Value);
}
}
}

Expand All @@ -181,20 +183,10 @@ public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref)
TagHref = tagHref;
}

public LocalLinkTag(int? intId, GuidUdi? udi, string tagHref, string? fullTag)
{
IntId = intId;
Udi = udi;
TagHref = tagHref;
FullTag = fullTag;
}

public int? IntId { get; }

public GuidUdi? Udi { get; }

public string TagHref { get; }

public string? FullTag { get; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,34 @@ public void Returns_Udis_From_Legacy_And_Current_LocalLinks()
[TestCase(
"<a href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\"type=\"media\">world</a>",
"<a href=\"/media/1001/my-image.jpg\" title=\"world\">world</a>")]
[TestCase(
"<p><a type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\">world</a></p><p><a href=\"/{localLink:7e21a725-b905-4c5f-86dc-8c41ec116e39}\" title=\"world\" type=\"media\">world</a></p>",
"<p><a href=\"/my-test-url\" title=\"world\">world</a></p><p><a href=\"/media/1001/my-image.jpg\" title=\"world\">world</a></p>")]

// attributes order should not matter
[TestCase(
"<a rel=\"noopener\" title=\"world\" type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\">world</a>",
"<a rel=\"noopener\" title=\"world\" href=\"/my-test-url\">world</a>")]
[TestCase(
"<a rel=\"noopener\" title=\"world\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" type=\"document\">world</a>",
"<a rel=\"noopener\" title=\"world\" href=\"/my-test-url\">world</a>")]
[TestCase(
"<a rel=\"noopener\" title=\"world\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}#anchor\" type=\"document\">world</a>",
"<a rel=\"noopener\" title=\"world\" href=\"/my-test-url#anchor\">world</a>")]

// anchors and query strings
[TestCase(
"<a type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}#anchor\" title=\"world\">world</a>",
"<a href=\"/my-test-url#anchor\" title=\"world\">world</a>")]
[TestCase(
"<a type=\"document\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}?v=1\" title=\"world\">world</a>",
"<a href=\"/my-test-url?v=1\" title=\"world\">world</a>")]

// custom type ignored
[TestCase(
"<a type=\"custom\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\">world</a>",
"<a type=\"custom\" href=\"/{localLink:9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" title=\"world\">world</a>")]

// legacy
[TestCase(
"hello href=\"{localLink:1234}\" world ",
Expand All @@ -129,17 +157,23 @@ public void Returns_Udis_From_Legacy_And_Current_LocalLinks()
[TestCase(
"hello href=\"{localLink:umb://document/9931BDE0AAC34BABB838909A7B47570E}\" world ",
"hello href=\"/my-test-url\" world ")]
[TestCase(
"hello href=\"{localLink:umb://document/9931BDE0AAC34BABB838909A7B47570E}#anchor\" world ",
"hello href=\"/my-test-url#anchor\" world ")]
[TestCase(
"hello href=\"{localLink:umb://media/9931BDE0AAC34BABB838909A7B47570E}\" world ",
"hello href=\"/media/1001/my-image.jpg\" world ")]
[TestCase(
"hello href='{localLink:umb://media/9931BDE0AAC34BABB838909A7B47570E}' world ",
"hello href='/media/1001/my-image.jpg' world ")]

// This one has an invalid char so won't match.
[TestCase(
"hello href=\"{localLink:umb^://document/9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" world ",
"hello href=\"{localLink:umb^://document/9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" world ")]
[TestCase(
"hello href=\"{localLink:umb://document-type/9931BDE0-AAC3-4BAB-B838-909A7B47570E}\" world ",
"hello href=\"#\" world ")]
"hello href=\"\" world ")]
public void ParseLocalLinks(string input, string result)
{
// setup a mock URL provider which we'll use for testing
Expand Down

0 comments on commit 1581eb6

Please sign in to comment.