Skip to content

Commit

Permalink
Strip html comments from markdown (#8667)
Browse files Browse the repository at this point in the history
  • Loading branch information
CyberAndrii authored Jul 13, 2021
1 parent 2aebc2d commit 7557469
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/NuGetGallery/Services/MarkdownService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public class MarkdownService : IMarkdownService
private static readonly TimeSpan RegexTimeout = TimeSpan.FromMinutes(1);
private static readonly Regex EncodedBlockQuotePattern = new Regex("^ {0,3}>", RegexOptions.Multiline, RegexTimeout);
private static readonly Regex LinkPattern = new Regex("<a href=([\"\']).*?\\1", RegexOptions.None, RegexTimeout);
private static readonly Regex HtmlCommentPattern = new Regex("<!--.*?-->", RegexOptions.Singleline, RegexTimeout);

private readonly IFeatureFlagService _features;
private readonly IImageDomainValidator _imageDomainValidator;
Expand Down Expand Up @@ -82,10 +83,12 @@ private RenderedMarkdownResult GetHtmlFromMarkdownCommonMark(string markdownStri
ImageSourceDisallowed = false
};

var readmeWithoutBom = markdownString.StartsWith("\ufeff") ? markdownString.Replace("\ufeff", "") : markdownString;
var markdownWithoutComments = HtmlCommentPattern.Replace(markdownString, "");

var markdownWithoutBom = markdownWithoutComments.StartsWith("\ufeff") ? markdownWithoutComments.Replace("\ufeff", "") : markdownWithoutComments;

// HTML encode markdown, except for block quotes, to block inline html.
var encodedMarkdown = EncodedBlockQuotePattern.Replace(HttpUtility.HtmlEncode(readmeWithoutBom), "> ");
var encodedMarkdown = EncodedBlockQuotePattern.Replace(HttpUtility.HtmlEncode(markdownWithoutBom), "> ");

var settings = CommonMarkSettings.Default.Clone();
settings.RenderSoftLineBreaksAsLineBreaks = true;
Expand Down Expand Up @@ -189,7 +192,9 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
ImageSourceDisallowed = false
};

var readmeWithoutBom = markdownString.TrimStart('\ufeff');
var markdownWithoutComments = HtmlCommentPattern.Replace(markdownString, "");

var markdownWithoutBom = markdownWithoutComments.TrimStart('\ufeff');

var pipeline = new MarkdownPipelineBuilder()
.UseGridTables()
Expand All @@ -208,7 +213,7 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
var renderer = new HtmlRenderer(htmlWriter);
pipeline.Setup(renderer);

var document = Markdown.Parse(readmeWithoutBom, pipeline);
var document = Markdown.Parse(markdownWithoutBom, pipeline);

foreach (var node in document.Descendants())
{
Expand Down
2 changes: 2 additions & 0 deletions tests/NuGetGallery.Facts/Services/MarkdownServiceFacts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public void EncodesHtmlInMarkdownWithAdaptiveHeader(string originalMd, string ex
[Theory]
[InlineData("# Heading", "<h2>Heading</h2>", false, true)]
[InlineData("# Heading", "<h2>Heading</h2>", false, false)]
[InlineData("<!-- foo --> <!-- foo \n bar --> baz", "<p>baz</p>", false, true)]
[InlineData("<!-- foo --> <!-- foo \n bar --> baz", "<p>baz</p>", false, false)]
[InlineData("\ufeff# Heading with BOM", "<h2>Heading with BOM</h2>", false, true)]
[InlineData("\ufeff# Heading with BOM", "<h2>Heading with BOM</h2>", false, false)]
[InlineData("- List", "<ul>\n<li>List</li>\n</ul>", false, true)]
Expand Down

0 comments on commit 7557469

Please sign in to comment.