From 8f1b85e2d1b678cf3f3c16cbdd21ccb5d85eb32c Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 13 Sep 2024 21:53:02 +0200 Subject: [PATCH 1/8] New API to parse into the header and footer --- .../Expressions/AbbreviationExpression.cs | 4 + .../Expressions/BlockQuoteExpression.cs | 5 + .../Expressions/BodyExpression.cs | 12 +- .../Expressions/HyperlinkExpression.cs | 9 +- .../Expressions/Image/ImageExpression.cs | 2 +- .../Expressions/Image/ImageExpressionBase.cs | 8 +- src/Html2OpenXml/HtmlConverter.cs | 177 ++++++++++++++---- src/Html2OpenXml/IO/ImagePrefetcher.cs | 31 ++- src/Html2OpenXml/ParsingContext.cs | 11 +- 9 files changed, 202 insertions(+), 57 deletions(-) diff --git a/src/Html2OpenXml/Expressions/AbbreviationExpression.cs b/src/Html2OpenXml/Expressions/AbbreviationExpression.cs index 654f1fb3..e8345937 100644 --- a/src/Html2OpenXml/Expressions/AbbreviationExpression.cs +++ b/src/Html2OpenXml/Expressions/AbbreviationExpression.cs @@ -29,6 +29,10 @@ sealed class AbbreviationExpression(IHtmlElement node) : PhrasingElementExpressi /// public override IEnumerable Interpret(ParsingContext context) { + // Footnote or endnote are invalid inside header and footer + if (context.HostingPart is not MainDocumentPart) + return []; + var childElements = base.Interpret(context); // Transform the inline acronym/abbreviation to a reference to a foot note. diff --git a/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs b/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs index 18900c2c..d2b0e57a 100644 --- a/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs +++ b/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs @@ -13,6 +13,7 @@ using System.Linq; using AngleSharp.Html.Dom; using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; namespace HtmlToOpenXml.Expressions; @@ -26,6 +27,10 @@ sealed class BlockQuoteExpression(IHtmlElement node) : BlockElementExpression(no /// public override IEnumerable Interpret(ParsingContext context) { + // Footnote or endnote are invalid inside header and footer + if (context.HostingPart is not MainDocumentPart) + return []; + string? description = node.GetAttribute("cite"); var childElements = base.Interpret(context); diff --git a/src/Html2OpenXml/Expressions/BodyExpression.cs b/src/Html2OpenXml/Expressions/BodyExpression.cs index bff44623..b0a9addf 100644 --- a/src/Html2OpenXml/Expressions/BodyExpression.cs +++ b/src/Html2OpenXml/Expressions/BodyExpression.cs @@ -14,6 +14,7 @@ using AngleSharp.Dom; using AngleSharp.Html.Dom; using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; namespace HtmlToOpenXml.Expressions; @@ -35,6 +36,9 @@ protected override void ComposeStyles(ParsingContext context) { base.ComposeStyles(context); + var mainPart = context.MainPart as MainDocumentPart; + if (mainPart is null) return; + // Unsupported W3C attribute but claimed by users. Specified at level, the page // orientation is applied on the whole document string? attr = styleAttributes!["page-orientation"]; @@ -42,10 +46,10 @@ protected override void ComposeStyles(ParsingContext context) { PageOrientationValues orientation = Converter.ToPageOrientation(attr); - var sectionProperties = context.MainPart.Document.Body!.GetFirstChild(); + var sectionProperties = mainPart.Document.Body!.GetFirstChild(); if (sectionProperties == null || sectionProperties.GetFirstChild() == null) { - context.MainPart.Document.Body.Append(ChangePageOrientation(orientation)); + mainPart.Document.Body.Append(ChangePageOrientation(orientation)); } else { @@ -61,10 +65,10 @@ protected override void ComposeStyles(ParsingContext context) if (paraProperties.BiDi is not null) { - var sectionProperties = context.MainPart.Document.Body!.GetFirstChild(); + var sectionProperties = mainPart.Document.Body!.GetFirstChild(); if (sectionProperties == null || sectionProperties.GetFirstChild() == null) { - context.MainPart.Document.Body.Append(sectionProperties = new()); + mainPart.Document.Body.Append(sectionProperties = new()); } sectionProperties.AddChild(paraProperties.BiDi.CloneNode(true)); diff --git a/src/Html2OpenXml/Expressions/HyperlinkExpression.cs b/src/Html2OpenXml/Expressions/HyperlinkExpression.cs index 2e11edd6..aff74eaf 100644 --- a/src/Html2OpenXml/Expressions/HyperlinkExpression.cs +++ b/src/Html2OpenXml/Expressions/HyperlinkExpression.cs @@ -14,6 +14,7 @@ using System.Linq; using AngleSharp.Html.Dom; using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; namespace HtmlToOpenXml.Expressions; @@ -42,8 +43,10 @@ public override IEnumerable Interpret (ParsingContext context) // Let's see whether the link tag include an image inside its body. // If so, the Hyperlink OpenXmlElement is lost and we'll keep only the images // and applied a HyperlinkOnClick attribute. - var imagesInLink = childElements.Where(e => e.HasChild()); - if (imagesInLink.Any()) + IEnumerable imagesInLink; + // Clickable image is only supported in body but not in header/footer + if (context.HostingPart is MainDocumentPart && + (imagesInLink = childElements.Where(e => e.HasChild())).Any()) { foreach (var img in imagesInLink) { @@ -100,7 +103,7 @@ public override IEnumerable Interpret (ParsingContext context) // ensure the links does not start with javascript: else if (AngleSharpExtensions.TryParseUrl(att, UriKind.Absolute, out var uri)) { - var extLink = context.MainPart.AddHyperlinkRelationship(uri!, true); + var extLink = context.HostingPart.AddHyperlinkRelationship(uri!, true); h = new Hyperlink( ) { History = true, Id = extLink.Id }; diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs index 52b3e2f1..be9bb9c6 100644 --- a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs @@ -59,7 +59,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); - HtmlImageInfo? iinfo = context.Converter.ImagePrefetcher.Download(src, CancellationToken.None) + HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None) .ConfigureAwait(false).GetAwaiter().GetResult(); if (iinfo == null) diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs b/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs index 170b0464..3cf0422c 100644 --- a/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs +++ b/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs @@ -12,6 +12,7 @@ using System.Collections.Generic; using System.Linq; using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using a = DocumentFormat.OpenXml.Drawing; @@ -85,10 +86,11 @@ internal static (uint imageObjId, uint drawingObjId) IncrementDrawingObjId(Parsi drawingObjId ??= 1; // 1 is the minimum ID set by MS Office. imageObjId ??= 1; + var mainPart = context.MainPart; foreach (var part in new[] { - context.MainPart.Document.Body!.Descendants(), - context.MainPart.HeaderParts.Where(f => f.Header != null).SelectMany(f => f.Header.Descendants()), - context.MainPart.FooterParts.Where(f => f.Footer != null).SelectMany(f => f.Footer.Descendants()) + mainPart.Document.Body!.Descendants(), + mainPart.HeaderParts.Where(f => f.Header != null).SelectMany(f => f.Header.Descendants()), + mainPart.FooterParts.Where(f => f.Footer != null).SelectMany(f => f.Footer.Descendants()) }) foreach (Drawing d in part) { diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 5b64b31b..356e8b72 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -28,8 +28,8 @@ namespace HtmlToOpenXml; public partial class HtmlConverter { private readonly MainDocumentPart mainPart; - /// Cache all the ImagePart processed to avoid downloading the same image. - private ImagePrefetcher? imagePrefetcher; + // Cache all the ImagePart processed to avoid downloading the same image + private IImageLoader? headerImageLoader, bodyImageLoader, footerImageLoader; private readonly WordDocumentStyle htmlStyles; private readonly IWebRequest webRequester; @@ -57,69 +57,112 @@ public HtmlConverter(MainDocumentPart mainPart, IWebRequest? webRequester = null } /// - /// Start the parse processing. + /// Parse some HTML content where the output is intented to be inserted in . /// /// The HTML content to parse /// Returns a list of parsed paragraph. public IList Parse(string html) { - return Parse(html, CancellationToken.None).ConfigureAwait(false).GetAwaiter().GetResult().ToList(); + bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester); + return ParseCoreAsync(html, mainPart, bodyImageLoader, + new ParallelOptions() { CancellationToken = CancellationToken.None }) + .ConfigureAwait(false).GetAwaiter().GetResult().ToList(); } /// - /// Start the parse processing. + /// Start the asynchroneous parse processing. /// /// The HTML content to parse /// The cancellation token. /// Returns a list of parsed paragraph. + [Obsolete("Use ParseAsync instead to respect naming convention")] public Task> Parse(string html, CancellationToken cancellationToken = default) { - return Parse(html, new ParallelOptions() { CancellationToken = cancellationToken }); + return ParseAsync(html, cancellationToken); } /// - /// Start the parse processing. Use this overload if you want to control the downloading of images. + /// Start the asynchroneous parse processing. + /// + /// The HTML content to parse + /// The cancellation token. + /// Returns a list of parsed paragraph. + public Task> ParseAsync(string html, CancellationToken cancellationToken = default) + { + return ParseAsync(html, new ParallelOptions { CancellationToken = cancellationToken }); + } + + /// + /// Start the asynchroneous parse processing. /// /// The HTML content to parse /// The configuration of parallelism while downloading the remote resources. /// Returns a list of parsed paragraph. - public async Task> Parse(string html, ParallelOptions parallelOptions) + public Task> ParseAsync(string html, ParallelOptions parallelOptions) { - if (string.IsNullOrWhiteSpace(html)) - return []; + bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester); - // ensure a body exists to avoid any errors when trying to access it - if (mainPart.Document == null) - new Document(new Body()).Save(mainPart); - else if (mainPart.Document.Body == null) - mainPart.Document.Body = new Body(); + return ParseCoreAsync(html, mainPart, bodyImageLoader, parallelOptions); + } - var browsingContext = BrowsingContext.New(); - var htmlDocument = await browsingContext.OpenAsync(req => req.Content(html), parallelOptions.CancellationToken); - if (htmlDocument == null) - return []; + /// + /// Parse asynchroneously the Html and append the output into the Header of the document. + /// + /// The HTML content to parse + /// The cancellation token. + /// + public async Task ParseHeader(string html, CancellationToken cancellationToken = default) + { + if (mainPart.HeaderParts is null) + mainPart.AddNewPart(); + var headerPart = mainPart.HeaderParts!.First(); + headerPart.Header ??= new(); + headerImageLoader ??= new ImagePrefetcher(headerPart, webRequester); - await PreloadImages(htmlDocument, parallelOptions).ConfigureAwait(false); + var paragraphs = await ParseCoreAsync(html, headerPart, headerImageLoader, + new ParallelOptions() { CancellationToken = cancellationToken }); - var parsingContext = new ParsingContext(this, mainPart); - var body = new Expressions.BodyExpression (htmlDocument.Body!); - var paragraphs = body.Interpret (parsingContext); - return paragraphs.Cast(); + foreach (var p in paragraphs) + headerPart.Header.AddChild(p); + } + + /// + /// Parse asynchroneously the Html and append the output into the Footer of the document. + /// + /// The HTML content to parse + /// The cancellation token. + /// + public async Task ParseFooter(string html, CancellationToken cancellationToken = default) + { + if (mainPart.FooterParts is null) + mainPart.AddNewPart(); + var footerPart = mainPart.FooterParts!.First(); + footerPart.Footer ??= new(); + footerImageLoader ??= new ImagePrefetcher(footerPart, webRequester); + + var paragraphs = await ParseCoreAsync(html, footerPart, footerImageLoader, + new ParallelOptions() { CancellationToken = cancellationToken }); + + foreach (var p in paragraphs) + footerPart.Footer.AddChild(p); } /// - /// Start the parse processing and append the converted paragraphs into the Body of the document. + /// Parse asynchroneously the Html and append the output into the Body of the document. /// /// The HTML content to parse /// The cancellation token. - public async Task ParseHtml(string html, CancellationToken cancellationToken = default) + /// + public async Task ParseBody(string html, CancellationToken cancellationToken = default) { - // This method exists because we may ensure the SectionProperties remains the last element of the body. - // It's mandatory when dealing with page orientation + bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester); + var paragraphs = await ParseCoreAsync(html, mainPart, bodyImageLoader, + new ParallelOptions() { CancellationToken = cancellationToken }); - var paragraphs = await Parse(html, cancellationToken); + if (!paragraphs.Any()) + return; - Body body = mainPart.Document.Body!; + Body body = mainPart.Document!.Body!; SectionProperties? sectionProperties = body.GetLastChild(); foreach (var para in paragraphs) body.Append(para); @@ -141,6 +184,31 @@ public async Task ParseHtml(string html, CancellationToken cancellationToken = d } } + /// + /// Start the asynchroneous parse processing. Use this overload if you want to control the downloading of images. + /// + /// The HTML content to parse + /// The configuration of parallelism while downloading the remote resources. + /// Returns a list of parsed paragraph. + [Obsolete("Use ParseAsync instead to respect naming convention")] + public Task> Parse(string html, ParallelOptions parallelOptions) + { + bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester); + + return ParseCoreAsync(html, mainPart, bodyImageLoader, parallelOptions); + } + + /// + /// Start the asynchroneous parse processing and append the output into the Body of the document. + /// + /// The HTML content to parse + /// The cancellation token. + [Obsolete("Use ParseBody instead for clarification")] + public Task ParseHtml(string html, CancellationToken cancellationToken = default) + { + return ParseBody(html, cancellationToken); + } + /// /// Refresh the cache of styles presents in the document. /// @@ -149,10 +217,49 @@ public void RefreshStyles() htmlStyles.PrepareStyles(mainPart); } + /// + /// Start the asynchroneous parse processing. Use this overload if you want to control the downloading of images. + /// + /// The HTML content to parse + /// The OpenXml container where the content will be inserted into. + /// The image resolver service linked to the . + /// The configuration of parallelism while downloading the remote resources. + /// Returns a list of parsed paragraph. + private async Task> ParseCoreAsync(string html, + OpenXmlPartContainer hostingPart, IImageLoader imageLoader, + ParallelOptions parallelOptions) + { + if (string.IsNullOrWhiteSpace(html)) + return []; + + var browsingContext = BrowsingContext.New(); + var htmlDocument = await browsingContext.OpenAsync(req => req.Content(html), parallelOptions.CancellationToken).ConfigureAwait(false); + if (htmlDocument == null) + return []; + + if (mainPart.Document == null) + new Document(new Body()).Save(mainPart); + else if (mainPart.Document.Body == null) + mainPart.Document.Body = new Body(); + + await PreloadImages(htmlDocument, imageLoader, parallelOptions).ConfigureAwait(false); + + Expressions.HtmlDomExpression expression; + if (hostingPart is MainDocumentPart) + expression = new Expressions.BodyExpression(htmlDocument.Body!); + else + expression = new Expressions.BlockElementExpression(htmlDocument.Body!); + + var parsingContext = new ParsingContext(this, hostingPart, imageLoader); + var paragraphs = expression.Interpret(parsingContext); + return paragraphs.Cast(); + } + /// /// Walk through all the img tags and preload all the remote images. /// - private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, ParallelOptions parallelOptions) + private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, + IImageLoader imageLoader, ParallelOptions parallelOptions) { var imageUris = htmlDocument.QuerySelectorAll("img[src]") .Cast() @@ -162,7 +269,7 @@ private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, Parallel return; await imageUris.ForEachAsync( - async (img, cts) => await ImagePrefetcher.Download(img, cts), + async (img, cts) => await imageLoader.Download(img, cts), parallelOptions).ConfigureAwait(false); } @@ -216,10 +323,10 @@ public WordDocumentStyle HtmlStyles public bool ContinueNumbering { get; set; } = true; /// - /// Resolve a remote or inline image resource. + /// Gets the mainDocumentPart of the destination OpenXml document. /// - internal ImagePrefetcher ImagePrefetcher + internal MainDocumentPart MainPart { - get => imagePrefetcher ??= new ImagePrefetcher(mainPart, webRequester); + get => mainPart; } } diff --git a/src/Html2OpenXml/IO/ImagePrefetcher.cs b/src/Html2OpenXml/IO/ImagePrefetcher.cs index f66a8223..93cf9518 100644 --- a/src/Html2OpenXml/IO/ImagePrefetcher.cs +++ b/src/Html2OpenXml/IO/ImagePrefetcher.cs @@ -18,10 +18,19 @@ namespace HtmlToOpenXml.IO; +interface IImageLoader +{ + /// + /// Download the remote or local image located at the specified url. + /// + Task Download(string imageUri, CancellationToken cancellationToken); +} + /// /// Download and provison the metadata of a requested image. /// -sealed class ImagePrefetcher +sealed class ImagePrefetcher : IImageLoader + where T: OpenXmlPartContainer, ISupportedRelationship { // Map extension to PartTypeInfo private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) { @@ -40,14 +49,20 @@ sealed class ImagePrefetcher { ".tiff", ImagePartType.Tiff }, { ".wmf", ImagePartType.Wmf } }; - private readonly MainDocumentPart mainPart; + private readonly T hostingPart; private readonly IWebRequest resourceLoader; private readonly HtmlImageInfoCollection prefetchedImages; - public ImagePrefetcher(MainDocumentPart mainPart, IWebRequest resourceLoader) + /// + /// Constructor. + /// + /// The image will be linked to that hosting part. + /// Images are not shared between header, footer and body. + /// Service to resolve an image. + public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) { - this.mainPart = mainPart; + this.hostingPart = hostingPart; this.resourceLoader = resourceLoader; this.prefetchedImages = new HtmlImageInfoCollection(); } @@ -107,7 +122,7 @@ public ImagePrefetcher(MainDocumentPart mainPart, IWebRequest resourceLoader) return null; } - var ipart = mainPart.AddImagePart(type); + var ipart = hostingPart.AddImagePart(type); using (var outputStream = ipart.GetStream(FileMode.Create)) { response.Content.CopyTo(outputStream); @@ -116,7 +131,7 @@ public ImagePrefetcher(MainDocumentPart mainPart, IWebRequest resourceLoader) info.Size = GetImageSize(outputStream); } - info.ImagePartId = mainPart.GetIdOfPart(ipart); + info.ImagePartId = hostingPart.GetIdOfPart(ipart); return info; } } @@ -130,7 +145,7 @@ public ImagePrefetcher(MainDocumentPart mainPart, IWebRequest resourceLoader) { Size size; knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type); - var ipart = mainPart.AddImagePart(type); + var ipart = hostingPart.AddImagePart(type); using (var outputStream = ipart.GetStream(FileMode.Create)) { outputStream.Write(dataUri.Data, 0, dataUri.Data.Length); @@ -140,7 +155,7 @@ public ImagePrefetcher(MainDocumentPart mainPart, IWebRequest resourceLoader) } return new HtmlImageInfo(src) { - ImagePartId = mainPart.GetIdOfPart(ipart), + ImagePartId = hostingPart.GetIdOfPart(ipart), Size = size }; } diff --git a/src/Html2OpenXml/ParsingContext.cs b/src/Html2OpenXml/ParsingContext.cs index a26bc9f4..18dd7657 100644 --- a/src/Html2OpenXml/ParsingContext.cs +++ b/src/Html2OpenXml/ParsingContext.cs @@ -20,14 +20,19 @@ namespace HtmlToOpenXml; /// Contains information that is global to the parsing. /// /// The list of paragraphs that will be returned. -sealed class ParsingContext(HtmlConverter converter, MainDocumentPart mainPart) +sealed class ParsingContext(HtmlConverter converter, OpenXmlPartContainer hostingPart, IO.IImageLoader imageLoader) { /// Shorthand for .HtmlStyles public WordDocumentStyle DocumentStyle { get => Converter.HtmlStyles; } public HtmlConverter Converter { get; } = converter; - public MainDocumentPart MainPart { get; } = mainPart; + public MainDocumentPart MainPart { get; } = converter.MainPart; + + public OpenXmlPartContainer HostingPart { get; } = hostingPart; + + public IO.IImageLoader ImageLoader { get; } = imageLoader; + private HtmlElementExpression? parentExpression; private ParsingContext? parentContext; @@ -49,7 +54,7 @@ public void CascadeStyles (OpenXmlElement element) public ParsingContext CreateChild(HtmlElementExpression expression) { - var childContext = new ParsingContext(Converter, MainPart) { + var childContext = new ParsingContext(Converter, HostingPart, ImageLoader) { propertyBag = propertyBag, parentExpression = expression, parentContext = this From bf4db47712644a8b2a98bcce58f3629cc459c848 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Mon, 16 Sep 2024 17:05:59 +0200 Subject: [PATCH 2/8] Amend unit test to cover these new API endpoints --- examples/Demo/Program.cs | 9 ++- src/Html2OpenXml/HtmlConverter.cs | 15 ++-- test/HtmlToOpenXml.Tests/AbbrTests.cs | 16 +++++ test/HtmlToOpenXml.Tests/ImgTests.cs | 64 +++++++++++++---- test/HtmlToOpenXml.Tests/LinkTests.cs | 83 +++++++++++++++++----- test/HtmlToOpenXml.Tests/NumberingTests.cs | 8 +-- 6 files changed, 151 insertions(+), 44 deletions(-) diff --git a/examples/Demo/Program.cs b/examples/Demo/Program.cs index c3c54245..e8b75393 100644 --- a/examples/Demo/Program.cs +++ b/examples/Demo/Program.cs @@ -15,7 +15,7 @@ static class Program static async Task Main(string[] args) { const string filename = "test.docx"; - string html = ResourceHelper.GetString("Resources.CompleteRunTest.html"); + string html = ResourceHelper.GetString("Resources.Document.html"); if (File.Exists(filename)) File.Delete(filename); using (MemoryStream generatedDocument = new MemoryStream()) @@ -39,10 +39,15 @@ static async Task Main(string[] args) } HtmlConverter converter = new HtmlConverter(mainPart); + // HeaderPart headerPart = mainPart.AddNewPart(); + //FooterPart footerPart = mainPart.AddNewPart(); converter.RenderPreAsTable = true; Body body = mainPart.Document.Body; - await converter.ParseHtml(html); + await converter.ParseHeader(@" + Red Dot"); + + await converter.ParseBody(html); mainPart.Document.Save(); AssertThatOpenXmlDocumentIsValid(package); diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 356e8b72..7198dc14 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -70,19 +70,20 @@ public IList Parse(string html) } /// - /// Start the asynchroneous parse processing. + /// Start the asynchroneous parse processing where the output is intented to be inserted in . /// /// The HTML content to parse /// The cancellation token. /// Returns a list of parsed paragraph. [Obsolete("Use ParseAsync instead to respect naming convention")] + [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage] public Task> Parse(string html, CancellationToken cancellationToken = default) { return ParseAsync(html, cancellationToken); } /// - /// Start the asynchroneous parse processing. + /// Start the asynchroneous parse processing where the output is intented to be inserted in . /// /// The HTML content to parse /// The cancellation token. @@ -93,7 +94,7 @@ public Task> ParseAsync(string html, Cancel } /// - /// Start the asynchroneous parse processing. + /// Start the asynchroneous parse processing where the output is intented to be inserted in . /// /// The HTML content to parse /// The configuration of parallelism while downloading the remote resources. @@ -113,7 +114,7 @@ public Task> ParseAsync(string html, Parall /// public async Task ParseHeader(string html, CancellationToken cancellationToken = default) { - if (mainPart.HeaderParts is null) + if (mainPart.HeaderParts is null || !mainPart.HeaderParts.Any()) mainPart.AddNewPart(); var headerPart = mainPart.HeaderParts!.First(); headerPart.Header ??= new(); @@ -134,7 +135,7 @@ public async Task ParseHeader(string html, CancellationToken cancellationToken = /// public async Task ParseFooter(string html, CancellationToken cancellationToken = default) { - if (mainPart.FooterParts is null) + if (mainPart.FooterParts is null || !mainPart.FooterParts.Any()) mainPart.AddNewPart(); var footerPart = mainPart.FooterParts!.First(); footerPart.Footer ??= new(); @@ -191,6 +192,7 @@ public async Task ParseBody(string html, CancellationToken cancellationToken = d /// The configuration of parallelism while downloading the remote resources. /// Returns a list of parsed paragraph. [Obsolete("Use ParseAsync instead to respect naming convention")] + [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage] public Task> Parse(string html, ParallelOptions parallelOptions) { bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester); @@ -203,7 +205,8 @@ public Task> Parse(string html, ParallelOpt /// /// The HTML content to parse /// The cancellation token. - [Obsolete("Use ParseBody instead for clarification")] + [Obsolete("Use ParseBody instead for output clarification")] + [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage] public Task ParseHtml(string html, CancellationToken cancellationToken = default) { return ParseBody(html, cancellationToken); diff --git a/test/HtmlToOpenXml.Tests/AbbrTests.cs b/test/HtmlToOpenXml.Tests/AbbrTests.cs index f3063ba0..b77cc91d 100644 --- a/test/HtmlToOpenXml.Tests/AbbrTests.cs +++ b/test/HtmlToOpenXml.Tests/AbbrTests.cs @@ -177,5 +177,21 @@ public void InsideParagraph_ReturnsMultipleRuns() Assert.That(elements[0].Elements().Any(r => r.HasChild()), Is.True); }); } + + [TestCase("NASA")] + [TestCase("
NASA
")] + public async Task ParseIntoHeader_ShouldBeIgnored(string html) + { + await converter.ParseHeader(html); + Assert.That(mainPart.HeaderParts.First().Header.ChildElements, Is.Empty); + } + + [TestCase("NASA")] + [TestCase("
NASA
")] + public async Task ParseIntoFooter_ShouldBeIgnored(string html) + { + await converter.ParseFooter(html); + Assert.That(mainPart.FooterParts.First().Footer.ChildElements, Is.Empty); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 5ca83b73..8bdacf9d 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -20,7 +20,7 @@ public void AbsoluteUri_ReturnsDrawing_WithDownloadedData() { var elements = converter.Parse(@"Smiley face"); Assert.That(elements, Has.Count.EqualTo(1)); - AssertIsImg(elements[0]); + AssertIsImg(mainPart, elements[0]); } [Test] @@ -28,14 +28,14 @@ public void DataUri_ReturnsDrawing_WithDecryptedData() { var elements = converter.Parse(@"Smiley face"); Assert.That(elements, Has.Count.EqualTo(1)); - AssertIsImg(elements[0]); + AssertIsImg(mainPart, elements[0]); } [Test] public void WithBorder_ReturnsRunWithBorder() { var elements = converter.Parse(@""); - AssertIsImg(elements[0]); + AssertIsImg(mainPart, elements[0]); var run = elements[0].GetFirstChild(); var runProperties = run?.GetFirstChild(); Assert.That(runProperties, Is.Not.Null); @@ -55,7 +55,7 @@ public void ManualProvisioning_ReturnsDrawing_WithProvidedData() var elements = converter.Parse(@"Smiley face"); Assert.That(elements, Has.Count.EqualTo(1)); - AssertIsImg(elements[0]); + AssertIsImg(mainPart, elements[0]); } [TestCase("Smiley face", Description = "Empty image")] @@ -89,9 +89,9 @@ public async Task FileSystem_LocalImage_WithSpaceInName_ShouldSucceed() await resourceStream.CopyToAsync(fileStream); var localUri = "file:///" + filepath.TrimStart('/').Replace(" ", "%20"); - var elements = await converter.Parse($"", CancellationToken.None); + var elements = await converter.ParseAsync($""); Assert.That(elements.Count(), Is.EqualTo(1)); - AssertIsImg(elements.First()); + AssertIsImg(mainPart, elements.First()); } [Test(Description = "Reading local file containing a space in the name")] @@ -100,9 +100,9 @@ public async Task RemoteImage_WithBaseUri_ShouldSucceed() converter = new HtmlConverter(mainPart, new IO.DefaultWebRequest() { BaseImageUrl = new Uri("http://github.com/onizet/html2openxml") }); - var elements = await converter.Parse($"", CancellationToken.None); + var elements = await converter.ParseAsync($""); Assert.That(elements, Is.Not.Empty); - AssertIsImg(elements.First()); + AssertIsImg(mainPart, elements.First()); } [Test(Description = "Image ID must be unique, amongst header, body and footer parts")] @@ -124,7 +124,7 @@ public async Task ImageIds_IsUniqueAcrossPackagingParts() Assert.That(beforeMaxDocPropId, Is.Not.Null); HtmlConverter converter = new(mainPart); - await converter.ParseHtml(""); + await converter.ParseBody(""); mainPart.Document.Save(); var img = mainPart.Document.Body!.Descendants().FirstOrDefault(); @@ -149,12 +149,50 @@ public void WithIncompleteHeader_ShouldNotThrow() where T : OpenXmlPart, IFix HtmlConverter converter = new(mainPart); Assert.DoesNotThrowAsync(async () => - await converter.ParseHtml("")); + await converter.ParseBody("")); } - private Drawing AssertIsImg (OpenXmlCompositeElement element) + [TestCase(typeof(HeaderPart))] + [TestCase(typeof(FooterPart))] + [TestCase(typeof(MainDocumentPart))] + public async Task ParseIntoDocumentPart_ReturnsImageParentedToPart (Type openXmlPartType) { - var run = element.GetFirstChild(); + string html = @"Smiley face"; + OpenXmlElement host; + OpenXmlPartContainer container; + + if (openXmlPartType == typeof(HeaderPart)) + { + await converter.ParseHeader(html); + container = mainPart.HeaderParts.First(); + host = mainPart.HeaderParts.First().Header; + } + else if (openXmlPartType == typeof(FooterPart)) + { + await converter.ParseFooter(html); + container = mainPart.FooterParts.First(); + host = mainPart.FooterParts.First().Footer; + } + else if (openXmlPartType == typeof(MainDocumentPart)) + { + await converter.ParseBody(html); + container = mainPart; + host = mainPart.Document.Body!; + } + else + { + throw new NotSupportedException($"Test case not supported for {openXmlPartType.FullName}"); + } + + Assert.That(host.ChildElements, Has.Count.EqualTo(1)); + var p = host.ChildElements.FirstOrDefault(c => c is Paragraph); + Assert.That(p, Is.Not.Null); + AssertIsImg(container, p); + } + + private static Drawing AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) + { + var run = paragraph.GetFirstChild(); Assert.That(run, Is.Not.Null); var img = run.GetFirstChild(); Assert.That(img, Is.Not.Null); @@ -164,7 +202,7 @@ private Drawing AssertIsImg (OpenXmlCompositeElement element) var imagePartId = pic.BlipFill.Blip.Embed.Value; Assert.That(imagePartId, Is.Not.Null); - var part = mainPart.GetPartById(imagePartId); + var part = container.GetPartById(imagePartId); Assert.That(part, Is.TypeOf(typeof(ImagePart))); return img; } diff --git a/test/HtmlToOpenXml.Tests/LinkTests.cs b/test/HtmlToOpenXml.Tests/LinkTests.cs index 0332bb5d..e5daec96 100644 --- a/test/HtmlToOpenXml.Tests/LinkTests.cs +++ b/test/HtmlToOpenXml.Tests/LinkTests.cs @@ -1,5 +1,7 @@ using NUnit.Framework; using DocumentFormat.OpenXml.Wordprocessing; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml; namespace HtmlToOpenXml.Tests { @@ -15,25 +17,7 @@ public class LinkTests : HtmlConverterTestBase public void ExternalLink_ShouldSucceed (string link) { var elements = converter.Parse($@"Test Caption"); - Assert.That(elements, Has.Count.EqualTo(1)); - Assert.Multiple(() => { - Assert.That(elements[0], Is.TypeOf(typeof(Paragraph))); - Assert.That(elements[0].HasChild(), Is.True); - }); - var hyperlink = elements[0].GetFirstChild()!; - Assert.That(hyperlink.Tooltip, Is.Not.Null); - Assert.That(hyperlink.Tooltip.Value, Is.EqualTo("Test Tooltip")); - Assert.That(hyperlink.HasChild(), Is.True); - Assert.That(elements[0].InnerText, Is.EqualTo("Test Caption")); - - Assert.That(hyperlink.Id, Is.Not.Null); - Assert.That(hyperlink.History?.Value, Is.EqualTo(true)); - Assert.That(mainPart.HyperlinkRelationships.Count(), Is.GreaterThan(0)); - - var extLink = mainPart.HyperlinkRelationships.FirstOrDefault(r => r.Id == hyperlink.Id); - Assert.That(extLink, Is.Not.Null); - Assert.That(extLink.IsExternal, Is.EqualTo(true)); - Assert.That(extLink.Uri.AbsoluteUri, Is.EqualTo("http://www.site.com/")); + AssertHyperlink(mainPart, elements); } [TestCase(@"Js")] @@ -161,5 +145,66 @@ public void WithMultipleRun_ReturnsHyperlinkWithMultipleRuns() Assert.That(h.ChildElements, Has.All.TypeOf(typeof(Run))); Assert.That(h.InnerText, Is.EqualTo("Html to OpenXml !")); } + + [TestCase(typeof(HeaderPart))] + [TestCase(typeof(FooterPart))] + [TestCase(typeof(MainDocumentPart))] + public async Task ParseIntoDocumentPart_ReturnsHyperlinkParentedToPart (Type openXmlPartType) + { + string html = @"Test Caption"; + OpenXmlElement host; + OpenXmlPartContainer container; + + if (openXmlPartType == typeof(HeaderPart)) + { + await converter.ParseHeader(html); + container = mainPart.HeaderParts.First(); + host = mainPart.HeaderParts.First().Header; + } + else if (openXmlPartType == typeof(FooterPart)) + { + await converter.ParseFooter(html); + container = mainPart.FooterParts.First(); + host = mainPart.FooterParts.First().Footer; + } + else if (openXmlPartType == typeof(MainDocumentPart)) + { + await converter.ParseBody(html); + container = mainPart; + host = mainPart.Document.Body!; + } + else + { + throw new NotSupportedException($"Test case not supported for {openXmlPartType.FullName}"); + } + + AssertHyperlink(container, host.ChildElements); + } + + private static void AssertHyperlink(OpenXmlPartContainer container, IEnumerable elements) + { + Assert.That(elements.Count(), Is.EqualTo(1)); + Assert.Multiple(() => { + Assert.That(elements.First(), Is.TypeOf(typeof(Paragraph))); + Assert.That(elements.First().HasChild(), Is.True); + }); + var hyperlink = elements.First().GetFirstChild()!; + Assert.That(hyperlink.Tooltip, Is.Not.Null); + Assert.That(hyperlink.Tooltip.Value, Is.EqualTo("Test Tooltip")); + Assert.That(hyperlink.HasChild(), Is.True); + Assert.That(elements.First().InnerText, Is.EqualTo("Test Caption")); + + Assert.Multiple(() => + { + Assert.That(hyperlink.Id, Is.Not.Null); + Assert.That(hyperlink.History?.Value, Is.EqualTo(true)); + Assert.That(container.HyperlinkRelationships.Count(), Is.GreaterThan(0)); + }); + + var extLink = container.HyperlinkRelationships.FirstOrDefault(r => r.Id == hyperlink.Id); + Assert.That(extLink, Is.Not.Null); + Assert.That(extLink.IsExternal, Is.EqualTo(true)); + Assert.That(extLink.Uri.AbsoluteUri, Is.EqualTo("http://www.site.com/")); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/NumberingTests.cs b/test/HtmlToOpenXml.Tests/NumberingTests.cs index 6f4dc8c0..8fa06443 100644 --- a/test/HtmlToOpenXml.Tests/NumberingTests.cs +++ b/test/HtmlToOpenXml.Tests/NumberingTests.cs @@ -267,9 +267,9 @@ public void UseVariantStyle_ListItem_ReturnsAppliedStyle() [Test(Description = "Resume indenting from existing numbering (default behaviour)")] public async Task ContinueNumbering_ReturnsSecondList_ContinueOrder() { - await converter.ParseHtml(@"
  1. Item 1
"); + await converter.ParseBody(@"
  1. Item 1
"); - await converter.ParseHtml("
  1. Item 2
"); + await converter.ParseBody("
  1. Item 2
"); var absNum = mainPart.NumberingDefinitionsPart?.Numbering .Elements() @@ -296,10 +296,10 @@ public async Task ContinueNumbering_ReturnsSecondList_ContinueOrder() [Test(Description = "Stop indenting from existing numbering (issue #57)")] public async Task DisableContinueNumbering_ReturnsSecondList_RestartingOrder() { - await converter.ParseHtml(@"
  1. Item 1
"); + await converter.ParseBody(@"
  1. Item 1
"); converter.ContinueNumbering = false; - await converter.ParseHtml("
  1. Item 2
"); + await converter.ParseBody("
  1. Item 2
"); var absNum = mainPart.NumberingDefinitionsPart?.Numbering .Elements() From 978dfafd3760d3bda38a0c78627271e1b6fa413e Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Mon, 16 Sep 2024 22:40:15 +0200 Subject: [PATCH 3/8] Clean code --- src/Html2OpenXml/Expressions/BodyExpression.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Html2OpenXml/Expressions/BodyExpression.cs b/src/Html2OpenXml/Expressions/BodyExpression.cs index b0a9addf..3040b47d 100644 --- a/src/Html2OpenXml/Expressions/BodyExpression.cs +++ b/src/Html2OpenXml/Expressions/BodyExpression.cs @@ -36,8 +36,7 @@ protected override void ComposeStyles(ParsingContext context) { base.ComposeStyles(context); - var mainPart = context.MainPart as MainDocumentPart; - if (mainPart is null) return; + var mainPart = context.MainPart; // Unsupported W3C attribute but claimed by users. Specified at level, the page // orientation is applied on the whole document From aa5b5dc8f3233b802aa6f7bbad9f6de68ef0f147 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Thu, 19 Sep 2024 23:15:13 +0200 Subject: [PATCH 4/8] Register the header/footer relationship --- src/Html2OpenXml/HtmlConverter.cs | 46 ++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 7198dc14..0b7e2551 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -114,9 +114,18 @@ public Task> ParseAsync(string html, Parall /// public async Task ParseHeader(string html, CancellationToken cancellationToken = default) { + string? partId = null; + HeaderPart headerPart; if (mainPart.HeaderParts is null || !mainPart.HeaderParts.Any()) - mainPart.AddNewPart(); - var headerPart = mainPart.HeaderParts!.First(); + { + headerPart = mainPart.AddNewPart(); + partId = mainPart.GetIdOfPart(headerPart); + } + else + { + headerPart = mainPart.HeaderParts.First(); + } + headerPart.Header ??= new(); headerImageLoader ??= new ImagePrefetcher(headerPart, webRequester); @@ -125,6 +134,16 @@ public async Task ParseHeader(string html, CancellationToken cancellationToken = foreach (var p in paragraphs) headerPart.Header.AddChild(p); + + if (partId != null) + { + var sectionProps = mainPart.Document.Body!.Elements(); + foreach (var sectPr in sectionProps) + { + sectPr.RemoveAllChildren(); + sectPr.PrependChild(new HeaderReference() { Id = partId, Type = HeaderFooterValues.Default }); + } + } } /// @@ -135,9 +154,18 @@ public async Task ParseHeader(string html, CancellationToken cancellationToken = /// public async Task ParseFooter(string html, CancellationToken cancellationToken = default) { + string? partId = null; + FooterPart footerPart; if (mainPart.FooterParts is null || !mainPart.FooterParts.Any()) - mainPart.AddNewPart(); - var footerPart = mainPart.FooterParts!.First(); + { + footerPart = mainPart.AddNewPart(); + partId = mainPart.GetIdOfPart(footerPart); + } + else + { + footerPart = mainPart.FooterParts.First(); + } + footerPart.Footer ??= new(); footerImageLoader ??= new ImagePrefetcher(footerPart, webRequester); @@ -146,6 +174,16 @@ public async Task ParseFooter(string html, CancellationToken cancellationToken = foreach (var p in paragraphs) footerPart.Footer.AddChild(p); + + if (partId != null) + { + var sectionProps = mainPart.Document.Body!.Elements(); + foreach (var sectPr in sectionProps) + { + sectPr.RemoveAllChildren(); + sectPr.PrependChild(new FooterReference() { Id = partId, Type = HeaderFooterValues.Default }); + } + } } /// From 2e74a8739b814b6b6eda5f5e02b1846a17272b9c Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 20 Sep 2024 21:13:43 +0200 Subject: [PATCH 5/8] Unit test code to assert new header-footer are correctly registred --- src/Html2OpenXml/HtmlConverter.cs | 12 ++++++ test/HtmlToOpenXml.Tests/HeaderFooterTests.cs | 42 +++++++++++++++++++ .../HtmlConverterTestBase.cs | 17 ++++++++ test/HtmlToOpenXml.Tests/ImgTests.cs | 1 + test/HtmlToOpenXml.Tests/LinkTests.cs | 1 + test/HtmlToOpenXml.Tests/NumberingTests.cs | 2 + 6 files changed, 75 insertions(+) create mode 100644 test/HtmlToOpenXml.Tests/HeaderFooterTests.cs diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 0b7e2551..9d5c538a 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -138,6 +138,12 @@ public async Task ParseHeader(string html, CancellationToken cancellationToken = if (partId != null) { var sectionProps = mainPart.Document.Body!.Elements(); + if (!sectionProps.Any()) + { + sectionProps = [new SectionProperties()]; + mainPart.Document.Body!.AddChild(sectionProps.First()); + } + foreach (var sectPr in sectionProps) { sectPr.RemoveAllChildren(); @@ -178,6 +184,12 @@ public async Task ParseFooter(string html, CancellationToken cancellationToken = if (partId != null) { var sectionProps = mainPart.Document.Body!.Elements(); + if (!sectionProps.Any()) + { + sectionProps = [new SectionProperties()]; + mainPart.Document.Body!.AddChild(sectionProps.First()); + } + foreach (var sectPr in sectionProps) { sectPr.RemoveAllChildren(); diff --git a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs new file mode 100644 index 00000000..55dd2bfc --- /dev/null +++ b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs @@ -0,0 +1,42 @@ +using NUnit.Framework; +using DocumentFormat.OpenXml.Wordprocessing; + +namespace HtmlToOpenXml.Tests +{ + /// + /// Tests on ParseHeader and ParseFooter methods. + /// + [TestFixture] + public class HeaderFooterTests : HtmlConverterTestBase + { + [Test] + public async Task Header_ReturnsHeaderPartLinkedToBody() + { + await converter.ParseHeader("

Header content

"); + + var headerPart = mainPart.HeaderParts?.FirstOrDefault(); + Assert.That(headerPart, Is.Not.Null); + Assert.That(headerPart.Header, Is.Not.Null); + + var sectionProperties = mainPart.Document.Body!.Elements(); + Assert.That(sectionProperties, Is.Not.Empty); + Assert.That(sectionProperties.Any(s => s.HasChild()), Is.True); + AssertThatOpenXmlDocumentIsValid(); + } + + [Test] + public async Task Footer_ReturnsFooterPartLinkedToBody() + { + await converter.ParseFooter("

Footer content

"); + + var footerPart = mainPart.FooterParts?.FirstOrDefault(); + Assert.That(footerPart, Is.Not.Null); + Assert.That(footerPart.Footer, Is.Not.Null); + + var sectionProperties = mainPart.Document.Body!.Elements(); + Assert.That(sectionProperties, Is.Not.Empty); + Assert.That(sectionProperties.Any(s => s.HasChild()), Is.True); + AssertThatOpenXmlDocumentIsValid(); + } + } +} \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs b/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs index ca50f928..08d5a77d 100644 --- a/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs +++ b/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs @@ -1,5 +1,6 @@ using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Validation; using DocumentFormat.OpenXml.Wordprocessing; using NUnit.Framework; @@ -36,5 +37,21 @@ public void Close () package?.Dispose(); generatedDocument?.Dispose(); } + + protected void AssertThatOpenXmlDocumentIsValid() + { + var validator = new OpenXmlValidator(FileFormatVersions.Office2021); + var errors = validator.Validate(package); + + if (!errors.GetEnumerator().MoveNext()) + return; + + foreach (ValidationErrorInfo error in errors) + { + TestContext.Error.Write("{0}\n\t{1}\n", error.Path?.XPath, error.Description); + } + + Assert.Fail("The document doesn't look 100% compatible with Office 2021"); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 8bdacf9d..d3a62d7b 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -188,6 +188,7 @@ public async Task ParseIntoDocumentPart_ReturnsImageParentedToPart (Type openXml var p = host.ChildElements.FirstOrDefault(c => c is Paragraph); Assert.That(p, Is.Not.Null); AssertIsImg(container, p); + AssertThatOpenXmlDocumentIsValid(); } private static Drawing AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) diff --git a/test/HtmlToOpenXml.Tests/LinkTests.cs b/test/HtmlToOpenXml.Tests/LinkTests.cs index e5daec96..2a790416 100644 --- a/test/HtmlToOpenXml.Tests/LinkTests.cs +++ b/test/HtmlToOpenXml.Tests/LinkTests.cs @@ -179,6 +179,7 @@ public async Task ParseIntoDocumentPart_ReturnsHyperlinkParentedToPart (Type ope } AssertHyperlink(container, host.ChildElements); + AssertThatOpenXmlDocumentIsValid(); } private static void AssertHyperlink(OpenXmlPartContainer container, IEnumerable elements) diff --git a/test/HtmlToOpenXml.Tests/NumberingTests.cs b/test/HtmlToOpenXml.Tests/NumberingTests.cs index 8fa06443..5288765e 100644 --- a/test/HtmlToOpenXml.Tests/NumberingTests.cs +++ b/test/HtmlToOpenXml.Tests/NumberingTests.cs @@ -291,6 +291,7 @@ public async Task ContinueNumbering_ReturnsSecondList_ContinueOrder() e.ParagraphProperties?.NumberingProperties?.NumberingId?.Val?.Value), Has.All.EqualTo(instances.First().NumberID!.Value), "All paragraphs are linked to the same list instance"); + AssertThatOpenXmlDocumentIsValid(); } [Test(Description = "Stop indenting from existing numbering (issue #57)")] @@ -321,6 +322,7 @@ public async Task DisableContinueNumbering_ReturnsSecondList_RestartingOrder() e.ParagraphProperties?.NumberingProperties?.NumberingId?.Val?.Value), Is.Unique, "All paragraphs use different list instances"); + AssertThatOpenXmlDocumentIsValid(); } /// From e6907bcedb948f830b2ecc0c441d02ae8b044719 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 20 Sep 2024 21:14:02 +0200 Subject: [PATCH 6/8] Acronym and blocquote now default to simple run --- .../Expressions/AbbreviationExpression.cs | 7 ++--- .../Expressions/BlockQuoteExpression.cs | 12 ++++---- test/HtmlToOpenXml.Tests/AbbrTests.cs | 28 +++++++++++++++++-- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/Html2OpenXml/Expressions/AbbreviationExpression.cs b/src/Html2OpenXml/Expressions/AbbreviationExpression.cs index e8345937..d0dcca86 100644 --- a/src/Html2OpenXml/Expressions/AbbreviationExpression.cs +++ b/src/Html2OpenXml/Expressions/AbbreviationExpression.cs @@ -29,15 +29,12 @@ sealed class AbbreviationExpression(IHtmlElement node) : PhrasingElementExpressi /// public override IEnumerable Interpret(ParsingContext context) { - // Footnote or endnote are invalid inside header and footer - if (context.HostingPart is not MainDocumentPart) - return []; - var childElements = base.Interpret(context); // Transform the inline acronym/abbreviation to a reference to a foot note. + // Footnote or endnote are invalid inside header and footer string? description = node.Title; - if (string.IsNullOrEmpty(description)) + if (string.IsNullOrEmpty(description) || context.HostingPart is not MainDocumentPart) return childElements; string runStyle; diff --git a/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs b/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs index d2b0e57a..3fa3d431 100644 --- a/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs +++ b/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs @@ -27,19 +27,19 @@ sealed class BlockQuoteExpression(IHtmlElement node) : BlockElementExpression(no /// public override IEnumerable Interpret(ParsingContext context) { - // Footnote or endnote are invalid inside header and footer - if (context.HostingPart is not MainDocumentPart) - return []; - - string? description = node.GetAttribute("cite"); - var childElements = base.Interpret(context); if (!childElements.Any()) return []; + + // Footnote or endnote are invalid inside header and footer + if (context.HostingPart is not MainDocumentPart) + return childElements; // Transform the inline acronym/abbreviation to a reference to a foot note. if (childElements.First() is Paragraph paragraph) { + string? description = node.GetAttribute("cite"); + paragraph.ParagraphProperties ??= new(); if (paragraph.ParagraphProperties.ParagraphStyleId is null) paragraph.ParagraphProperties.ParagraphStyleId = diff --git a/test/HtmlToOpenXml.Tests/AbbrTests.cs b/test/HtmlToOpenXml.Tests/AbbrTests.cs index b77cc91d..deacf004 100644 --- a/test/HtmlToOpenXml.Tests/AbbrTests.cs +++ b/test/HtmlToOpenXml.Tests/AbbrTests.cs @@ -110,6 +110,8 @@ public void WithNoDescription_ReturnsSimpleParagraph(string html) var elements = converter.Parse(html); Assert.That(elements, Has.Count.EqualTo(1)); Assert.That(elements, Is.All.TypeOf()); + Assert.That(mainPart.FootnotesPart, Is.Null); + Assert.That(mainPart.EndnotesPart, Is.Null); } [TestCase("HTML", AcronymPosition.DocumentEnd, Description = "Read existing endnotes references")] @@ -180,10 +182,20 @@ public void InsideParagraph_ReturnsMultipleRuns() [TestCase("NASA")] [TestCase("
NASA
")] - public async Task ParseIntoHeader_ShouldBeIgnored(string html) + public async Task ParseIntoHeader_ReturnsSimpleParagraph(string html) { await converter.ParseHeader(html); - Assert.That(mainPart.HeaderParts.First().Header.ChildElements, Is.Empty); + var header = mainPart.HeaderParts?.FirstOrDefault()?.Header; + Assert.That(header, Is.Not.Null); + Assert.That(header.ChildElements, Has.Count.EqualTo(1)); + Assert.Multiple(() => + { + Assert.That(header.ChildElements, Is.All.TypeOf()); + Assert.That(header.FirstChild!.InnerText, Is.EqualTo("NASA")); + Assert.That(mainPart.FootnotesPart, Is.Null); + Assert.That(mainPart.EndnotesPart, Is.Null); + AssertThatOpenXmlDocumentIsValid(); + }); } [TestCase("NASA")] @@ -191,7 +203,17 @@ public async Task ParseIntoHeader_ShouldBeIgnored(string html) public async Task ParseIntoFooter_ShouldBeIgnored(string html) { await converter.ParseFooter(html); - Assert.That(mainPart.FooterParts.First().Footer.ChildElements, Is.Empty); + var footer = mainPart.FooterParts?.FirstOrDefault()?.Footer; + Assert.That(footer, Is.Not.Null); + Assert.That(footer.ChildElements, Has.Count.EqualTo(1)); + Assert.Multiple(() => + { + Assert.That(footer.ChildElements, Is.All.TypeOf()); + Assert.That(footer.FirstChild!.InnerText, Is.EqualTo("NASA")); + Assert.That(mainPart.FootnotesPart, Is.Null); + Assert.That(mainPart.EndnotesPart, Is.Null); + AssertThatOpenXmlDocumentIsValid(); + }); } } } \ No newline at end of file From 5ad14cb8006b13abb540dc65cf709828923168f0 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 20 Sep 2024 22:53:36 +0200 Subject: [PATCH 7/8] Expose on which page(s) the header/footer will be displayed --- src/Html2OpenXml/HtmlConverter.cs | 108 +++++++++--------- test/HtmlToOpenXml.Tests/HeaderFooterTests.cs | 50 +++++++- .../HtmlConverterTestBase.cs | 2 +- 3 files changed, 101 insertions(+), 59 deletions(-) diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 9d5c538a..3c6e53e3 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -110,21 +110,15 @@ public Task> ParseAsync(string html, Parall /// Parse asynchroneously the Html and append the output into the Header of the document. ///
/// The HTML content to parse + /// Determines the page(s) on which the current header shall be displayed. + /// If omitted, the value is used. /// The cancellation token. /// - public async Task ParseHeader(string html, CancellationToken cancellationToken = default) + public async Task ParseHeader(string html, HeaderFooterValues? headerType = null, + CancellationToken cancellationToken = default) { - string? partId = null; - HeaderPart headerPart; - if (mainPart.HeaderParts is null || !mainPart.HeaderParts.Any()) - { - headerPart = mainPart.AddNewPart(); - partId = mainPart.GetIdOfPart(headerPart); - } - else - { - headerPart = mainPart.HeaderParts.First(); - } + headerType ??= HeaderFooterValues.Default; + var headerPart = ResolveHeaderFooterPart(headerType); headerPart.Header ??= new(); headerImageLoader ??= new ImagePrefetcher(headerPart, webRequester); @@ -134,43 +128,21 @@ public async Task ParseHeader(string html, CancellationToken cancellationToken = foreach (var p in paragraphs) headerPart.Header.AddChild(p); - - if (partId != null) - { - var sectionProps = mainPart.Document.Body!.Elements(); - if (!sectionProps.Any()) - { - sectionProps = [new SectionProperties()]; - mainPart.Document.Body!.AddChild(sectionProps.First()); - } - - foreach (var sectPr in sectionProps) - { - sectPr.RemoveAllChildren(); - sectPr.PrependChild(new HeaderReference() { Id = partId, Type = HeaderFooterValues.Default }); - } - } } /// /// Parse asynchroneously the Html and append the output into the Footer of the document. /// /// The HTML content to parse + /// Determines the page(s) on which the current footer shall be displayed. + /// If omitted, the value is used. /// The cancellation token. /// - public async Task ParseFooter(string html, CancellationToken cancellationToken = default) + public async Task ParseFooter(string html, HeaderFooterValues? footerType = null, + CancellationToken cancellationToken = default) { - string? partId = null; - FooterPart footerPart; - if (mainPart.FooterParts is null || !mainPart.FooterParts.Any()) - { - footerPart = mainPart.AddNewPart(); - partId = mainPart.GetIdOfPart(footerPart); - } - else - { - footerPart = mainPart.FooterParts.First(); - } + footerType ??= HeaderFooterValues.Default; + var footerPart = ResolveHeaderFooterPart(footerType); footerPart.Footer ??= new(); footerImageLoader ??= new ImagePrefetcher(footerPart, webRequester); @@ -180,22 +152,6 @@ public async Task ParseFooter(string html, CancellationToken cancellationToken = foreach (var p in paragraphs) footerPart.Footer.AddChild(p); - - if (partId != null) - { - var sectionProps = mainPart.Document.Body!.Elements(); - if (!sectionProps.Any()) - { - sectionProps = [new SectionProperties()]; - mainPart.Document.Body!.AddChild(sectionProps.First()); - } - - foreach (var sectPr in sectionProps) - { - sectPr.RemoveAllChildren(); - sectPr.PrependChild(new FooterReference() { Id = partId, Type = HeaderFooterValues.Default }); - } - } } /// @@ -326,6 +282,46 @@ await imageUris.ForEachAsync( parallelOptions).ConfigureAwait(false); } + /// + /// Create or resolve the header/footer related to the type. + /// + private TPart ResolveHeaderFooterPart(HeaderFooterValues? type) + where TPart: OpenXmlPart, IFixedContentTypePart + where TRefType: HeaderFooterReferenceType, new() + { + bool wasRefSet = false; + TPart? part = null; + + var sectionProps = mainPart.Document.Body!.Elements(); + if (!sectionProps.Any()) + { + sectionProps = [new SectionProperties()]; + mainPart.Document.Body!.AddChild(sectionProps.First()); + } + else + { + var reference = sectionProps.SelectMany(sectPr => sectPr.Elements()) + .Where(r => r.Id?.HasValue == true) + .FirstOrDefault(r => r.Type?.Value == type); + + if (reference != null) + part = (TPart) mainPart.GetPartById(reference.Id!); + wasRefSet = part is not null; + } + + part ??= mainPart.AddNewPart(); + + if (!wasRefSet) + { + sectionProps.First().PrependChild(new TRefType() { + Id = mainPart.GetIdOfPart(part), + Type = type + }); + } + + return part; + } + //____________________________________________________________________ // // Configuration diff --git a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs index 55dd2bfc..a5349951 100644 --- a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs +++ b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs @@ -1,5 +1,6 @@ using NUnit.Framework; using DocumentFormat.OpenXml.Wordprocessing; +using DocumentFormat.OpenXml.Packaging; namespace HtmlToOpenXml.Tests { @@ -12,7 +13,7 @@ public class HeaderFooterTests : HtmlConverterTestBase [Test] public async Task Header_ReturnsHeaderPartLinkedToBody() { - await converter.ParseHeader("

Header content

"); + await converter.ParseHeader("

Header content

", HeaderFooterValues.First); var headerPart = mainPart.HeaderParts?.FirstOrDefault(); Assert.That(headerPart, Is.Not.Null); @@ -20,7 +21,8 @@ public async Task Header_ReturnsHeaderPartLinkedToBody() var sectionProperties = mainPart.Document.Body!.Elements(); Assert.That(sectionProperties, Is.Not.Empty); - Assert.That(sectionProperties.Any(s => s.HasChild()), Is.True); + Assert.That(sectionProperties.SelectMany(s => s.Elements()) + .Any(r => r.Type?.Value == HeaderFooterValues.First), Is.True); AssertThatOpenXmlDocumentIsValid(); } @@ -38,5 +40,49 @@ public async Task Footer_ReturnsFooterPartLinkedToBody() Assert.That(sectionProperties.Any(s => s.HasChild()), Is.True); AssertThatOpenXmlDocumentIsValid(); } + + [Test(Description = "Overwrite existing Default header")] + public async Task WithExistingHeader_Default_ReturnsOverridenHeaderPart() + { + using var generatedDocument = new MemoryStream(); + using (var buffer = ResourceHelper.GetStream("Resources.DocWithImgHeaderFooter.docx")) + buffer.CopyTo(generatedDocument); + + generatedDocument.Position = 0L; + using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true); + MainDocumentPart mainPart = package.MainDocumentPart!; + HtmlConverter converter = new(mainPart); + + await converter.ParseHeader("Header content"); + + var sectionProperties = mainPart.Document.Body!.Elements(); + Assert.That(sectionProperties, Is.Not.Empty); + Assert.That(sectionProperties.SelectMany(s => s.Elements()) + .Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1)); + AssertThatOpenXmlDocumentIsValid(); + } + + [Test(Description = "Create additional header for even pages")] + public async Task WithExistingHeader_Even_ReturnsAnotherHeaderPart() + { + using var generatedDocument = new MemoryStream(); + using (var buffer = ResourceHelper.GetStream("Resources.DocWithImgHeaderFooter.docx")) + buffer.CopyTo(generatedDocument); + + generatedDocument.Position = 0L; + using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true); + MainDocumentPart mainPart = package.MainDocumentPart!; + HtmlConverter converter = new(mainPart); + + await converter.ParseHeader("Header even content", HeaderFooterValues.Even); + + var sectionProperties = mainPart.Document.Body!.Elements(); + Assert.That(sectionProperties, Is.Not.Empty); + Assert.That(sectionProperties.Count(s => s.HasChild()), Is.EqualTo(1)); + var headerRefs = sectionProperties.SelectMany(s => s.Elements()); + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1)); + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Even), Is.EqualTo(1)); + AssertThatOpenXmlDocumentIsValid(); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs b/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs index 08d5a77d..3e056798 100644 --- a/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs +++ b/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs @@ -51,7 +51,7 @@ protected void AssertThatOpenXmlDocumentIsValid() TestContext.Error.Write("{0}\n\t{1}\n", error.Path?.XPath, error.Description); } - Assert.Fail("The document doesn't look 100% compatible with Office 2021"); + Assert.Fail("The document isn't conformant with Office 2021"); } } } \ No newline at end of file From 575ef1d42eb3e2b38296afd719231ff309d74a18 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Sat, 21 Sep 2024 21:13:59 +0200 Subject: [PATCH 8/8] Use default paragraph styles --- src/Html2OpenXml/HtmlConverter.cs | 12 ++++-- src/Html2OpenXml/PredefinedStyles.cs | 3 ++ src/Html2OpenXml/Primitives/DefaultStyles.cs | 14 ++++++- test/HtmlToOpenXml.Tests/HeaderFooterTests.cs | 39 +++++++++++++++---- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 3c6e53e3..b15f3090 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -124,7 +124,8 @@ public async Task ParseHeader(string html, HeaderFooterValues? headerType = null headerImageLoader ??= new ImagePrefetcher(headerPart, webRequester); var paragraphs = await ParseCoreAsync(html, headerPart, headerImageLoader, - new ParallelOptions() { CancellationToken = cancellationToken }); + new ParallelOptions() { CancellationToken = cancellationToken }, + htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle)); foreach (var p in paragraphs) headerPart.Header.AddChild(p); @@ -148,7 +149,8 @@ public async Task ParseFooter(string html, HeaderFooterValues? footerType = null footerImageLoader ??= new ImagePrefetcher(footerPart, webRequester); var paragraphs = await ParseCoreAsync(html, footerPart, footerImageLoader, - new ParallelOptions() { CancellationToken = cancellationToken }); + new ParallelOptions() { CancellationToken = cancellationToken }, + htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle)); foreach (var p in paragraphs) footerPart.Footer.AddChild(p); @@ -233,10 +235,12 @@ public void RefreshStyles() /// The OpenXml container where the content will be inserted into. /// The image resolver service linked to the . /// The configuration of parallelism while downloading the remote resources. + /// The default OpenXml style to apply on paragraphs. /// Returns a list of parsed paragraph. private async Task> ParseCoreAsync(string html, OpenXmlPartContainer hostingPart, IImageLoader imageLoader, - ParallelOptions parallelOptions) + ParallelOptions parallelOptions, + ParagraphStyleId? defaultParagraphStyleId = null) { if (string.IsNullOrWhiteSpace(html)) return []; @@ -256,6 +260,8 @@ private async Task> ParseCoreAsync(string h Expressions.HtmlDomExpression expression; if (hostingPart is MainDocumentPart) expression = new Expressions.BodyExpression(htmlDocument.Body!); + else if (defaultParagraphStyleId?.Val?.HasValue == true) + expression = new Expressions.BlockElementExpression(htmlDocument.Body!, defaultParagraphStyleId); else expression = new Expressions.BlockElementExpression(htmlDocument.Body!); diff --git a/src/Html2OpenXml/PredefinedStyles.cs b/src/Html2OpenXml/PredefinedStyles.cs index 4e0d5356..a7cf2a5a 100755 --- a/src/Html2OpenXml/PredefinedStyles.cs +++ b/src/Html2OpenXml/PredefinedStyles.cs @@ -22,6 +22,9 @@ internal class PredefinedStyles public const string Quote = "Quote"; public const string QuoteChar = "QuoteChar"; public const string TableGrid = "TableGrid"; + public const string Header = "Header"; + public const string Footer = "Footer"; + /// diff --git a/src/Html2OpenXml/Primitives/DefaultStyles.cs b/src/Html2OpenXml/Primitives/DefaultStyles.cs index ca57ac29..44d2a1ce 100644 --- a/src/Html2OpenXml/Primitives/DefaultStyles.cs +++ b/src/Html2OpenXml/Primitives/DefaultStyles.cs @@ -67,7 +67,7 @@ public class DefaultStyles public string ListParagraphStyle { get; set; } = PredefinedStyles.ListParagraph; /// - /// Default style for the <pre> table + /// Default style for the pre table /// /// TableGrid public string PreTableStyle { get; set; } = PredefinedStyles.TableGrid; @@ -89,4 +89,16 @@ public class DefaultStyles /// /// TableGrid public string TableStyle { get; set; } = PredefinedStyles.TableGrid; + + /// + /// Default style for header paragraphs. + /// + /// Header + public string HeaderStyle { get; set; } = PredefinedStyles.Header; + + /// + /// Default style for footer paragraphs. + /// + /// Footer + public string FooterStyle { get; set; } = PredefinedStyles.Footer; } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs index a5349951..1f399e6b 100644 --- a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs +++ b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs @@ -18,6 +18,10 @@ public async Task Header_ReturnsHeaderPartLinkedToBody() var headerPart = mainPart.HeaderParts?.FirstOrDefault(); Assert.That(headerPart, Is.Not.Null); Assert.That(headerPart.Header, Is.Not.Null); + var p = headerPart.Header.Elements(); + Assert.That(p, Is.Not.Empty); + Assert.That(p.Select(p => p.ParagraphProperties?.ParagraphStyleId?.Val?.Value), + Has.All.EqualTo(converter.HtmlStyles.DefaultStyles.HeaderStyle)); var sectionProperties = mainPart.Document.Body!.Elements(); Assert.That(sectionProperties, Is.Not.Empty); @@ -51,11 +55,20 @@ public async Task WithExistingHeader_Default_ReturnsOverridenHeaderPart() generatedDocument.Position = 0L; using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true); MainDocumentPart mainPart = package.MainDocumentPart!; - HtmlConverter converter = new(mainPart); + var sectionProperties = mainPart.Document.Body!.Elements(); + Assert.That(sectionProperties, Is.Not.Empty); + var headerRefs = sectionProperties.SelectMany(s => s.Elements()); + Assert.Multiple(() => + { + Assert.That(headerRefs.Count(), Is.EqualTo(1)); + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1), "Default header exist"); + }); + + HtmlConverter converter = new(mainPart); await converter.ParseHeader("Header content"); - var sectionProperties = mainPart.Document.Body!.Elements(); + sectionProperties = mainPart.Document.Body!.Elements(); Assert.That(sectionProperties, Is.Not.Empty); Assert.That(sectionProperties.SelectMany(s => s.Elements()) .Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1)); @@ -72,16 +85,28 @@ public async Task WithExistingHeader_Even_ReturnsAnotherHeaderPart() generatedDocument.Position = 0L; using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true); MainDocumentPart mainPart = package.MainDocumentPart!; - HtmlConverter converter = new(mainPart); + var sectionProperties = mainPart.Document.Body!.Elements(); + Assert.That(sectionProperties, Is.Not.Empty); + var headerRefs = sectionProperties.SelectMany(s => s.Elements()); + Assert.Multiple(() => + { + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1), "Default header exist"); + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Even), Is.Zero, "No event header has been yet defined"); + }); + + HtmlConverter converter = new(mainPart); await converter.ParseHeader("Header even content", HeaderFooterValues.Even); - var sectionProperties = mainPart.Document.Body!.Elements(); + sectionProperties = mainPart.Document.Body!.Elements(); Assert.That(sectionProperties, Is.Not.Empty); Assert.That(sectionProperties.Count(s => s.HasChild()), Is.EqualTo(1)); - var headerRefs = sectionProperties.SelectMany(s => s.Elements()); - Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1)); - Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Even), Is.EqualTo(1)); + headerRefs = sectionProperties.SelectMany(s => s.Elements()); + Assert.Multiple(() => + { + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1)); + Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Even), Is.EqualTo(1)); + }); AssertThatOpenXmlDocumentIsValid(); } }