diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14e39bd1..a56d874d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog
+## 3.2.0
+
+- Add new public API to allow parsing into Header and Footer #162. Some API methods as been flagged as obsolete with a clear message of what to use instead.
+ This is not a breaking changes as it keep existing behaviour.
+- Add support for `SVG` format (either from img src or the SVG node tag)
+- Automatically create the `_top` bookmark if needed
+- Fix a crash when a hyperlink contains both `img` and `figcation`
+- Fix a crash when `li` is empty #161
+
## 3.1.1
- Fix respecting layout with `div`/`p` ending with line break #158
diff --git a/HtmlToOpenXml.sln b/HtmlToOpenXml.sln
index 6ed4cc54..18814542 100644
--- a/HtmlToOpenXml.sln
+++ b/HtmlToOpenXml.sln
@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 15
-VisualStudioVersion = 15.0.26730.16
+# Visual Studio 17
+VisualStudioVersion = 17.8.34511.84
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HtmlToOpenXml", "src\Html2OpenXml\HtmlToOpenXml.csproj", "{EF700F30-C9BB-49A6-912C-E3B77857B514}"
EndProject
@@ -9,9 +9,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{58520A98-BA5
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{84EA02ED-2E97-47D2-992E-32CC104A3A7A}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Demo", "examples\Demo\Demo.csproj", "{A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Demo", "examples\Demo\Demo.csproj", "{A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HtmlToOpenXml.Tests", "test\HtmlToOpenXml.Tests\HtmlToOpenXml.Tests.csproj", "{CA0A68E0-45A0-4A01-A061-F951D93D6906}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HtmlToOpenXml.Tests", "test\HtmlToOpenXml.Tests\HtmlToOpenXml.Tests.csproj", "{CA0A68E0-45A0-4A01-A061-F951D93D6906}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
diff --git a/README.md b/README.md
index 92f3166f..529b26f1 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@

[](https://github.com/onizet/html2openxml/blob/dev/LICENSE)
-# What is Html2OpenXml?
+# What is HtmlToOpenXml?
-Html2OpenXml is a small .Net library that convert simple or advanced HTML to plain OpenXml components. This program has started in 2009, initially to convert user's comments into Word.
+HtmlToOpenXml is a small .Net library that convert simple or advanced HTML to plain OpenXml components. This program has started in 2009, initially to convert user's comments into Word.
This library supports both **.Net Framework 4.6.2**, **.NET Standard 2.0** and **.NET 8** which are all LTS.
@@ -28,7 +28,7 @@ Refer to [w3schools’ tag](http://www.w3schools.com/tags/default.asp) list to s
* `abbr` and `acronym`
* `b`, `i`, `u`, `s`, `del`, `ins`, `em`, `strike`, `strong`
* `br` and `hr`
-* `img`, `figcaption`
+* `img`, `figcaption` and `svg`
* `table`, `td`, `tr`, `th`, `tbody`, `thead`, `tfoot`, `caption` and `col`
* `cite`
* `div`, `span`, `time`, `font` and `p`
diff --git a/examples/Demo/Demo.csproj b/examples/Demo/Demo.csproj
index b869b3f9..3e76b9dc 100644
--- a/examples/Demo/Demo.csproj
+++ b/examples/Demo/Demo.csproj
@@ -6,7 +6,7 @@
-
+
@@ -14,6 +14,11 @@
+
+ $(DefineConstants);DEBUG
+ false
+
+
diff --git a/examples/Demo/Program.cs b/examples/Demo/Program.cs
index c3c54245..47c0124b 100644
--- a/examples/Demo/Program.cs
+++ b/examples/Demo/Program.cs
@@ -15,7 +15,7 @@ static class Program
static async Task Main(string[] args)
{
const string filename = "test.docx";
- string html = ResourceHelper.GetString("Resources.CompleteRunTest.html");
+ string html = ResourceHelper.GetString("Resources.AdvancedTable.html");
if (File.Exists(filename)) File.Delete(filename);
using (MemoryStream generatedDocument = new MemoryStream())
@@ -42,7 +42,7 @@ static async Task Main(string[] args)
converter.RenderPreAsTable = true;
Body body = mainPart.Document.Body;
- await converter.ParseHtml(html);
+ await converter.ParseBody(html);
mainPart.Document.Save();
AssertThatOpenXmlDocumentIsValid(package);
diff --git a/src/Html2OpenXml/Expressions/AbbreviationExpression.cs b/src/Html2OpenXml/Expressions/AbbreviationExpression.cs
index 654f1fb3..d0dcca86 100644
--- a/src/Html2OpenXml/Expressions/AbbreviationExpression.cs
+++ b/src/Html2OpenXml/Expressions/AbbreviationExpression.cs
@@ -32,8 +32,9 @@ public override IEnumerable Interpret(ParsingContext context)
var childElements = base.Interpret(context);
// Transform the inline acronym/abbreviation to a reference to a foot note.
+ // Footnote or endnote are invalid inside header and footer
string? description = node.Title;
- if (string.IsNullOrEmpty(description))
+ if (string.IsNullOrEmpty(description) || context.HostingPart is not MainDocumentPart)
return childElements;
string runStyle;
diff --git a/src/Html2OpenXml/Expressions/BlockElementExpression.cs b/src/Html2OpenXml/Expressions/BlockElementExpression.cs
index de22330c..e15db508 100644
--- a/src/Html2OpenXml/Expressions/BlockElementExpression.cs
+++ b/src/Html2OpenXml/Expressions/BlockElementExpression.cs
@@ -325,7 +325,7 @@ private static Paragraph CreateParagraph(ParsingContext context, IList
/// Resolve the next available (they must be unique).
///
- private static int IncrementBookmarkId(ParsingContext context)
+ protected static int IncrementBookmarkId(ParsingContext context)
{
var bookmarkRef = context.Properties("bookmarkRef");
diff --git a/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs b/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs
index 18900c2c..3fa3d431 100644
--- a/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs
+++ b/src/Html2OpenXml/Expressions/BlockQuoteExpression.cs
@@ -13,6 +13,7 @@
using System.Linq;
using AngleSharp.Html.Dom;
using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
namespace HtmlToOpenXml.Expressions;
@@ -26,15 +27,19 @@ sealed class BlockQuoteExpression(IHtmlElement node) : BlockElementExpression(no
///
public override IEnumerable Interpret(ParsingContext context)
{
- string? description = node.GetAttribute("cite");
-
var childElements = base.Interpret(context);
if (!childElements.Any())
return [];
+
+ // Footnote or endnote are invalid inside header and footer
+ if (context.HostingPart is not MainDocumentPart)
+ return childElements;
// Transform the inline acronym/abbreviation to a reference to a foot note.
if (childElements.First() is Paragraph paragraph)
{
+ string? description = node.GetAttribute("cite");
+
paragraph.ParagraphProperties ??= new();
if (paragraph.ParagraphProperties.ParagraphStyleId is null)
paragraph.ParagraphProperties.ParagraphStyleId =
diff --git a/src/Html2OpenXml/Expressions/BodyExpression.cs b/src/Html2OpenXml/Expressions/BodyExpression.cs
index ed663484..7ff5ee16 100644
--- a/src/Html2OpenXml/Expressions/BodyExpression.cs
+++ b/src/Html2OpenXml/Expressions/BodyExpression.cs
@@ -10,6 +10,7 @@
* PARTICULAR PURPOSE.
*/
using System.Collections.Generic;
+using System.Globalization;
using System.Linq;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
@@ -24,17 +25,40 @@ namespace HtmlToOpenXml.Expressions;
///
sealed class BodyExpression(IHtmlElement node) : BlockElementExpression(node)
{
+ private bool shouldRegisterTopBookmark;
+
public override IEnumerable Interpret(ParsingContext context)
{
MarkAllBookmarks();
- return base.Interpret(context);
+ var elements = base.Interpret(context);
+
+ if (shouldRegisterTopBookmark && elements.Any())
+ {
+ // Check whether it already exists
+ var body = context.MainPart.Document.Body!;
+ if (body.Descendants().Where(b => b.Name?.Value == "_top").Any())
+ {
+ return elements;
+ }
+
+ var bookmarkId = IncrementBookmarkId(context).ToString(CultureInfo.InvariantCulture);
+ // this is expected to stand in the 1st paragraph
+ Paragraph? p = body.FirstChild as Paragraph;
+ p ??= body.PrependChild(new Paragraph());
+ p.InsertAfter(new BookmarkEnd() { Id = bookmarkId }, p.ParagraphProperties);
+ p.InsertAfter(new BookmarkStart() { Id = bookmarkId, Name = "_top" }, p.ParagraphProperties);
+ }
+
+ return elements;
}
protected override void ComposeStyles(ParsingContext context)
{
base.ComposeStyles(context);
+ var mainPart = context.MainPart;
+
// Unsupported W3C attribute but claimed by users. Specified at level, the page
// orientation is applied on the whole document
string? attr = styleAttributes!["page-orientation"];
@@ -42,10 +66,10 @@ protected override void ComposeStyles(ParsingContext context)
{
PageOrientationValues orientation = Converter.ToPageOrientation(attr);
- var sectionProperties = context.MainPart.Document.Body!.GetFirstChild();
+ var sectionProperties = mainPart.Document.Body!.GetFirstChild();
if (sectionProperties == null || sectionProperties.GetFirstChild() == null)
{
- context.MainPart.Document.Body.Append(ChangePageOrientation(orientation));
+ mainPart.Document.Body.Append(ChangePageOrientation(orientation));
}
else
{
@@ -61,10 +85,10 @@ protected override void ComposeStyles(ParsingContext context)
if (paraProperties.BiDi is not null)
{
- var sectionProperties = context.MainPart.Document.Body!.GetFirstChild();
+ var sectionProperties = mainPart.Document.Body!.GetFirstChild();
if (sectionProperties == null || sectionProperties.GetFirstChild() == null)
{
- context.MainPart.Document.Body.Append(sectionProperties = new());
+ mainPart.Document.Body.Append(sectionProperties = new());
}
sectionProperties.AddChild(paraProperties.BiDi.CloneNode(true));
@@ -105,10 +129,17 @@ private void MarkAllBookmarks()
var links = node.QuerySelectorAll("a[href^='#']");
if (links.Length == 0) return;
- foreach (var link in links.Cast())
+ foreach (var link in links.Cast().Where(l => l.Hash.Length > 0))
{
+ if (link.IsTopAnchor())
+ {
+ shouldRegisterTopBookmark = true;
+ return;
+ }
+
var id = link.Hash.Substring(1);
var target = node.Owner!.GetElementById(id);
+
// `id` attribute is preferred but `name` is also valid
target ??= node.Owner!.GetElementsByName(id).FirstOrDefault();
diff --git a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs
index a60c6033..44ec6bc0 100644
--- a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs
+++ b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs
@@ -67,6 +67,7 @@ private static Dictionary> InitKnownTa
{ TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
{ TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
{ TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
+ { TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) },
{ TagNames.Table, el => new TableExpression((IHtmlTableElement) el) },
{ TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
diff --git a/src/Html2OpenXml/Expressions/HyperlinkExpression.cs b/src/Html2OpenXml/Expressions/HyperlinkExpression.cs
index 2e11edd6..46fdc9a4 100644
--- a/src/Html2OpenXml/Expressions/HyperlinkExpression.cs
+++ b/src/Html2OpenXml/Expressions/HyperlinkExpression.cs
@@ -14,6 +14,7 @@
using System.Linq;
using AngleSharp.Html.Dom;
using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
namespace HtmlToOpenXml.Expressions;
@@ -42,8 +43,10 @@ public override IEnumerable Interpret (ParsingContext context)
// Let's see whether the link tag include an image inside its body.
// If so, the Hyperlink OpenXmlElement is lost and we'll keep only the images
// and applied a HyperlinkOnClick attribute.
- var imagesInLink = childElements.Where(e => e.HasChild());
- if (imagesInLink.Any())
+ IEnumerable imagesInLink;
+ // Clickable image is only supported in body but not in header/footer
+ if (context.HostingPart is MainDocumentPart &&
+ (imagesInLink = childElements.Where(e => e.HasChild())).Any())
{
foreach (var img in imagesInLink)
{
@@ -56,25 +59,42 @@ public override IEnumerable Interpret (ParsingContext context)
if (enDp.MoveNext()) alt = enDp.Current.Description;
else alt = null;
- d.InsertInDocProperties(
- new a.HyperlinkOnClick() { Id = h.Id ?? h.Anchor, Tooltip = alt });
+ d.Inline ??= new a.Wordprocessing.Inline();
+ d.Inline.DocProperties ??= new a.Wordprocessing.DocProperties();
+
+ if (h.Anchor == "_top")
+ {
+ // exception case: clickable image requires the _top bookmark get registred with a relationship
+ var extLink = context.HostingPart.AddHyperlinkRelationship(new Uri("#_top", UriKind.Relative), false);
+ d.Inline.DocProperties.Append(
+ new a.HyperlinkOnClick() { Id = extLink.Id, Tooltip = alt });
+ }
+ else
+ {
+ d.Inline.DocProperties.Append(
+ new a.HyperlinkOnClick() { Id = h.Id ?? h.Anchor, Tooltip = alt });
+ }
}
}
// can't use GetFirstChild or we may find the one containing the image
+ List runs = [];
foreach (var el in childElements)
{
- if (el is Run run && !run.HasChild())
- {
- run.RunProperties ??= new();
- run.RunProperties.RunStyle = context.DocumentStyle.GetRunStyle(
- context.DocumentStyle.DefaultStyles.HyperlinkStyle);
- break;
- }
+ if (el is Run r) runs.Add(r);
+ // unroll paragraphs. CloneNode is need to unparent the run
+ else runs.AddRange(el.Elements().Select(r => (Run) r.CloneNode(true)));
+ }
+
+ foreach (var run in runs.Where(run => !run.HasChild()))
+ {
+ run.RunProperties ??= new();
+ run.RunProperties.RunStyle = context.DocumentStyle.GetRunStyle(
+ context.DocumentStyle.DefaultStyles.HyperlinkStyle);
}
// Append the processed elements and put them to the Run of the Hyperlink
- h.Append(childElements);
+ h.Append(runs);
return [h];
}
@@ -87,20 +107,21 @@ public override IEnumerable Interpret (ParsingContext context)
if (string.IsNullOrEmpty(att))
return null;
+ // Always accept _top anchor
+ if (linkNode.IsTopAnchor())
+ {
+ h = new Hyperlink() { History = true, Anchor = "_top" };
+ }
// is it an anchor?
- if (att![0] == '#' && att.Length > 1)
+ else if (!context.Converter.ExcludeLinkAnchor && linkNode.Hash.Length > 1 && linkNode.Hash[0] == '#')
{
- // Always accept _top anchor
- if (!context.Converter.ExcludeLinkAnchor || att == "#_top")
- {
- h = new Hyperlink(
- ) { History = true, Anchor = att.Substring(1) };
- }
+ h = new Hyperlink(
+ ) { History = true, Anchor = linkNode.Hash.Substring(1) };
}
// ensure the links does not start with javascript:
else if (AngleSharpExtensions.TryParseUrl(att, UriKind.Absolute, out var uri))
{
- var extLink = context.MainPart.AddHyperlinkRelationship(uri!, true);
+ var extLink = context.HostingPart.AddHyperlinkRelationship(uri!, true);
h = new Hyperlink(
) { History = true, Id = extLink.Id };
diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs
index 52b3e2f1..a4b61c30 100644
--- a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs
+++ b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs
@@ -11,8 +11,11 @@
*/
using System;
using System.Threading;
+using AngleSharp.Dom;
using AngleSharp.Html.Dom;
+using AngleSharp.Svg.Dom;
using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using HtmlToOpenXml.IO;
@@ -57,14 +60,24 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
preferredSize.Height = imgNode.DisplayHeight;
}
- var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
-
- HtmlImageInfo? iinfo = context.Converter.ImagePrefetcher.Download(src, CancellationToken.None)
+ HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None)
.ConfigureAwait(false).GetAwaiter().GetResult();
if (iinfo == null)
return null;
+ if (iinfo.TypeInfo == ImagePartType.Svg)
+ {
+ var imagePart = context.HostingPart.GetPartById(iinfo.ImagePartId);
+ using var stream = imagePart.GetStream(System.IO.FileMode.Open);
+ using var sreader = new System.IO.StreamReader(stream);
+ imgNode.Insert(AdjacentPosition.AfterBegin, sreader.ReadToEnd());
+
+ var svgNode = imgNode.FindChild();
+ if (svgNode is null) return null;
+ return SvgExpression.CreateSvgDrawing(context, svgNode, iinfo.ImagePartId, preferredSize);
+ }
+
if (preferredSize.IsEmpty)
{
preferredSize = iinfo.Size;
@@ -78,6 +91,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;
+ var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
var img = new Drawing(
new wp.Inline(
new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs b/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs
index 170b0464..3cf0422c 100644
--- a/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs
+++ b/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs
@@ -12,6 +12,7 @@
using System.Collections.Generic;
using System.Linq;
using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using a = DocumentFormat.OpenXml.Drawing;
@@ -85,10 +86,11 @@ internal static (uint imageObjId, uint drawingObjId) IncrementDrawingObjId(Parsi
drawingObjId ??= 1; // 1 is the minimum ID set by MS Office.
imageObjId ??= 1;
+ var mainPart = context.MainPart;
foreach (var part in new[] {
- context.MainPart.Document.Body!.Descendants(),
- context.MainPart.HeaderParts.Where(f => f.Header != null).SelectMany(f => f.Header.Descendants()),
- context.MainPart.FooterParts.Where(f => f.Footer != null).SelectMany(f => f.Footer.Descendants())
+ mainPart.Document.Body!.Descendants(),
+ mainPart.HeaderParts.Where(f => f.Header != null).SelectMany(f => f.Header.Descendants()),
+ mainPart.FooterParts.Where(f => f.Footer != null).SelectMany(f => f.Footer.Descendants())
})
foreach (Drawing d in part)
{
diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs
new file mode 100644
index 00000000..7bb9a7f6
--- /dev/null
+++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs
@@ -0,0 +1,105 @@
+/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
+ *
+ * This source is subject to the Microsoft Permissive License.
+ * Please see the License.txt file for more information.
+ * All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+ * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+ * PARTICULAR PURPOSE.
+ */
+using AngleSharp.Svg.Dom;
+using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Wordprocessing;
+using DocumentFormat.OpenXml.Office2019.Drawing.SVG;
+using System.Text;
+
+using a = DocumentFormat.OpenXml.Drawing;
+using pic = DocumentFormat.OpenXml.Drawing.Pictures;
+using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing;
+using AngleSharp.Text;
+
+namespace HtmlToOpenXml.Expressions;
+
+///
+/// Process the parsing of a svg element.
+///
+sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node)
+{
+ private readonly ISvgSvgElement svgNode = node;
+
+
+ protected override Drawing? CreateDrawing(ParsingContext context)
+ {
+ var imgPart = context.MainPart.AddImagePart(ImagePartType.Svg);
+ using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false);
+ imgPart.FeedData(stream);
+ var imagePartId = context.MainPart.GetIdOfPart(imgPart);
+ return CreateSvgDrawing(context, svgNode, imagePartId, Size.Empty);
+ }
+
+ internal static Drawing CreateSvgDrawing(ParsingContext context, ISvgSvgElement svgNode, string imagePartId, Size preferredSize)
+ {
+ var width = Unit.Parse(svgNode.GetAttribute("width"));
+ var height = Unit.Parse(svgNode.GetAttribute("height"));
+ long widthInEmus, heightInEmus;
+ if (width.IsValid && height.IsValid)
+ {
+ widthInEmus = width.ValueInEmus;
+ heightInEmus = height.ValueInEmus;
+ }
+ else
+ {
+ widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
+ heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;
+ }
+
+ var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
+
+ string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId;
+ string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty;
+
+ var img = new Drawing(
+ new wp.Inline(
+ new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
+ new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L },
+ new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description },
+ new wp.NonVisualGraphicFrameDrawingProperties {
+ GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true }
+ },
+ new a.Graphic(
+ new a.GraphicData(
+ new pic.Picture(
+ new pic.NonVisualPictureProperties {
+ NonVisualDrawingProperties = new pic.NonVisualDrawingProperties() {
+ Id = imageObjId, Name = title
+ },
+ NonVisualPictureDrawingProperties = new()
+ },
+ new pic.BlipFill(
+ new a.Blip(
+ new a.BlipExtensionList(
+ new a.BlipExtension(new SVGBlip { Embed = imagePartId }) {
+ Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}"
+ })
+ ) { Embed = imagePartId /* ideally, that should be a png representation of the svg */ },
+ new a.Stretch(
+ new a.FillRectangle())
+ ),
+ new pic.ShapeProperties(
+ new a.Transform2D(
+ new a.Offset() { X = 0L, Y = 0L },
+ new a.Extents() { Cx = widthInEmus, Cy = heightInEmus }),
+ new a.PresetGeometry(
+ new a.AdjustValueList()
+ ) { Preset = a.ShapeTypeValues.Rectangle })
+ )
+ ) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
+ ) { DistanceFromTop = (UInt32Value)0U, DistanceFromBottom = (UInt32Value)0U, DistanceFromLeft = (UInt32Value)0U, DistanceFromRight = (UInt32Value)0U }
+ );
+
+ return img;
+ }
+}
\ No newline at end of file
diff --git a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs
index 87d3a3e6..8bb2a369 100644
--- a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs
+++ b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs
@@ -78,6 +78,7 @@ public override IEnumerable Interpret(ParsingContext context)
{
var expression = new BlockElementExpression(liNode);
var childElements = expression.Interpret(context);
+ if (!childElements.Any()) continue;
Paragraph p = (Paragraph) childElements.First();
p.ParagraphProperties ??= new();
diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs
index 5b64b31b..8531f7ed 100755
--- a/src/Html2OpenXml/HtmlConverter.cs
+++ b/src/Html2OpenXml/HtmlConverter.cs
@@ -28,8 +28,8 @@ namespace HtmlToOpenXml;
public partial class HtmlConverter
{
private readonly MainDocumentPart mainPart;
- /// Cache all the ImagePart processed to avoid downloading the same image.
- private ImagePrefetcher? imagePrefetcher;
+ // Cache all the ImagePart processed to avoid downloading the same image
+ private IImageLoader? headerImageLoader, bodyImageLoader, footerImageLoader;
private readonly WordDocumentStyle htmlStyles;
private readonly IWebRequest webRequester;
@@ -57,79 +57,135 @@ public HtmlConverter(MainDocumentPart mainPart, IWebRequest? webRequester = null
}
///
- /// Start the parse processing.
+ /// Parse some HTML content where the output is intented to be inserted in .
///
/// The HTML content to parse
/// Returns a list of parsed paragraph.
public IList Parse(string html)
{
- return Parse(html, CancellationToken.None).ConfigureAwait(false).GetAwaiter().GetResult().ToList();
+ bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester);
+ return ParseCoreAsync(html, mainPart, bodyImageLoader,
+ new ParallelOptions() { CancellationToken = CancellationToken.None })
+ .ConfigureAwait(false).GetAwaiter().GetResult().ToList();
}
///
- /// Start the parse processing.
+ /// Start the asynchroneous parse processing where the output is intented to be inserted in .
///
/// The HTML content to parse
/// The cancellation token.
/// Returns a list of parsed paragraph.
+ [Obsolete("Use ParseAsync instead to respect naming convention")]
+ [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage]
public Task> Parse(string html, CancellationToken cancellationToken = default)
{
- return Parse(html, new ParallelOptions() { CancellationToken = cancellationToken });
+ return ParseAsync(html, cancellationToken);
}
///
- /// Start the parse processing. Use this overload if you want to control the downloading of images.
+ /// Start the asynchroneous parse processing where the output is intented to be inserted in .
+ ///
+ /// The HTML content to parse
+ /// The cancellation token.
+ /// Returns a list of parsed paragraph.
+ public Task> ParseAsync(string html, CancellationToken cancellationToken = default)
+ {
+ return ParseAsync(html, new ParallelOptions { CancellationToken = cancellationToken });
+ }
+
+ ///
+ /// Start the asynchroneous parse processing where the output is intented to be inserted in .
///
/// The HTML content to parse
/// The configuration of parallelism while downloading the remote resources.
/// Returns a list of parsed paragraph.
- public async Task> Parse(string html, ParallelOptions parallelOptions)
+ public Task> ParseAsync(string html, ParallelOptions parallelOptions)
{
- if (string.IsNullOrWhiteSpace(html))
- return [];
+ bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester);
- // ensure a body exists to avoid any errors when trying to access it
- if (mainPart.Document == null)
- new Document(new Body()).Save(mainPart);
- else if (mainPart.Document.Body == null)
- mainPart.Document.Body = new Body();
+ return ParseCoreAsync(html, mainPart, bodyImageLoader, parallelOptions);
+ }
- var browsingContext = BrowsingContext.New();
- var htmlDocument = await browsingContext.OpenAsync(req => req.Content(html), parallelOptions.CancellationToken);
- if (htmlDocument == null)
- return [];
+ ///
+ /// Parse asynchroneously the Html and append the output into the Header of the document.
+ ///
+ /// The HTML content to parse
+ /// Determines the page(s) on which the current header shall be displayed.
+ /// If omitted, the value is used.
+ /// The cancellation token.
+ ///
+ public async Task ParseHeader(string html, HeaderFooterValues? headerType = null,
+ CancellationToken cancellationToken = default)
+ {
+ headerType ??= HeaderFooterValues.Default;
+ var headerPart = ResolveHeaderFooterPart(headerType);
- await PreloadImages(htmlDocument, parallelOptions).ConfigureAwait(false);
+ headerPart.Header ??= new();
+ headerImageLoader ??= new ImagePrefetcher(headerPart, webRequester);
- var parsingContext = new ParsingContext(this, mainPart);
- var body = new Expressions.BodyExpression (htmlDocument.Body!);
- var paragraphs = body.Interpret (parsingContext);
- return paragraphs.Cast();
+ var paragraphs = await ParseCoreAsync(html, headerPart, headerImageLoader,
+ new ParallelOptions() { CancellationToken = cancellationToken },
+ htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle));
+
+ foreach (var p in paragraphs)
+ headerPart.Header.AddChild(p);
}
///
- /// Start the parse processing and append the converted paragraphs into the Body of the document.
+ /// Parse asynchroneously the Html and append the output into the Footer of the document.
///
/// The HTML content to parse
+ /// Determines the page(s) on which the current footer shall be displayed.
+ /// If omitted, the value is used.
/// The cancellation token.
- public async Task ParseHtml(string html, CancellationToken cancellationToken = default)
+ ///
+ public async Task ParseFooter(string html, HeaderFooterValues? footerType = null,
+ CancellationToken cancellationToken = default)
{
- // This method exists because we may ensure the SectionProperties remains the last element of the body.
- // It's mandatory when dealing with page orientation
+ footerType ??= HeaderFooterValues.Default;
+ var footerPart = ResolveHeaderFooterPart(footerType);
+
+ footerPart.Footer ??= new();
+ footerImageLoader ??= new ImagePrefetcher(footerPart, webRequester);
+
+ var paragraphs = await ParseCoreAsync(html, footerPart, footerImageLoader,
+ new ParallelOptions() { CancellationToken = cancellationToken },
+ htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle));
+
+ foreach (var p in paragraphs)
+ footerPart.Footer.AddChild(p);
+ }
- var paragraphs = await Parse(html, cancellationToken);
+ ///
+ /// Parse asynchroneously the Html and append the output into the Body of the document.
+ ///
+ /// The HTML content to parse
+ /// The cancellation token.
+ ///
+ public async Task ParseBody(string html, CancellationToken cancellationToken = default)
+ {
+ bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester);
+ var paragraphs = await ParseCoreAsync(html, mainPart, bodyImageLoader,
+ new ParallelOptions() { CancellationToken = cancellationToken });
- Body body = mainPart.Document.Body!;
+ if (!paragraphs.Any())
+ return;
+
+ Body body = mainPart.Document!.Body!;
SectionProperties? sectionProperties = body.GetLastChild();
foreach (var para in paragraphs)
body.Append(para);
- // move the paragraph with BookmarkStart `_GoBack` as the last child
+ // we automatically create the _top bookmark if missing. To avoid having an empty paragrah,
+ // let's try to merge with its next paragraph.
var p = body.GetFirstChild();
- if (p != null && p.GetFirstChild()?.Id == "_GoBack")
+ if (p != null && p.GetFirstChild()?.Name == "_top"
+ && !p.HasChild()
+ && p.NextSibling() is Paragraph nextPara)
{
+ nextPara.PrependChild(p.GetFirstChild()?.CloneNode(false));
+ nextPara.PrependChild(p.GetFirstChild()!.CloneNode(false));
p.Remove();
- body.Append(p);
}
// Push the sectionProperties as the last element of the Body
@@ -141,6 +197,33 @@ public async Task ParseHtml(string html, CancellationToken cancellationToken = d
}
}
+ ///
+ /// Start the asynchroneous parse processing. Use this overload if you want to control the downloading of images.
+ ///
+ /// The HTML content to parse
+ /// The configuration of parallelism while downloading the remote resources.
+ /// Returns a list of parsed paragraph.
+ [Obsolete("Use ParseAsync instead to respect naming convention")]
+ [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage]
+ public Task> Parse(string html, ParallelOptions parallelOptions)
+ {
+ bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester);
+
+ return ParseCoreAsync(html, mainPart, bodyImageLoader, parallelOptions);
+ }
+
+ ///
+ /// Start the asynchroneous parse processing and append the output into the Body of the document.
+ ///
+ /// The HTML content to parse
+ /// The cancellation token.
+ [Obsolete("Use ParseBody instead for output clarification")]
+ [System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage]
+ public Task ParseHtml(string html, CancellationToken cancellationToken = default)
+ {
+ return ParseBody(html, cancellationToken);
+ }
+
///
/// Refresh the cache of styles presents in the document.
///
@@ -149,10 +232,53 @@ public void RefreshStyles()
htmlStyles.PrepareStyles(mainPart);
}
+ ///
+ /// Start the asynchroneous parse processing. Use this overload if you want to control the downloading of images.
+ ///
+ /// The HTML content to parse
+ /// The OpenXml container where the content will be inserted into.
+ /// The image resolver service linked to the .
+ /// The configuration of parallelism while downloading the remote resources.
+ /// The default OpenXml style to apply on paragraphs.
+ /// Returns a list of parsed paragraph.
+ private async Task> ParseCoreAsync(string html,
+ OpenXmlPartContainer hostingPart, IImageLoader imageLoader,
+ ParallelOptions parallelOptions,
+ ParagraphStyleId? defaultParagraphStyleId = null)
+ {
+ if (string.IsNullOrWhiteSpace(html))
+ return [];
+
+ var browsingContext = BrowsingContext.New();
+ var htmlDocument = await browsingContext.OpenAsync(req => req.Content(html), parallelOptions.CancellationToken).ConfigureAwait(false);
+ if (htmlDocument == null)
+ return [];
+
+ if (mainPart.Document == null)
+ new Document(new Body()).Save(mainPart);
+ else if (mainPart.Document.Body == null)
+ mainPart.Document.Body = new Body();
+
+ await PreloadImages(htmlDocument, imageLoader, parallelOptions).ConfigureAwait(false);
+
+ Expressions.HtmlDomExpression expression;
+ if (hostingPart is MainDocumentPart)
+ expression = new Expressions.BodyExpression(htmlDocument.Body!);
+ else if (defaultParagraphStyleId?.Val?.HasValue == true)
+ expression = new Expressions.BlockElementExpression(htmlDocument.Body!, defaultParagraphStyleId);
+ else
+ expression = new Expressions.BlockElementExpression(htmlDocument.Body!);
+
+ var parsingContext = new ParsingContext(this, hostingPart, imageLoader);
+ var paragraphs = expression.Interpret(parsingContext);
+ return paragraphs.Cast();
+ }
+
///
/// Walk through all the img tags and preload all the remote images.
///
- private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, ParallelOptions parallelOptions)
+ private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument,
+ IImageLoader imageLoader, ParallelOptions parallelOptions)
{
var imageUris = htmlDocument.QuerySelectorAll("img[src]")
.Cast()
@@ -162,10 +288,50 @@ private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, Parallel
return;
await imageUris.ForEachAsync(
- async (img, cts) => await ImagePrefetcher.Download(img, cts),
+ async (img, cts) => await imageLoader.Download(img, cts),
parallelOptions).ConfigureAwait(false);
}
+ ///
+ /// Create or resolve the header/footer related to the type.
+ ///
+ private TPart ResolveHeaderFooterPart(HeaderFooterValues? type)
+ where TPart: OpenXmlPart, IFixedContentTypePart
+ where TRefType: HeaderFooterReferenceType, new()
+ {
+ bool wasRefSet = false;
+ TPart? part = null;
+
+ var sectionProps = mainPart.Document.Body!.Elements();
+ if (!sectionProps.Any())
+ {
+ sectionProps = [new SectionProperties()];
+ mainPart.Document.Body!.AddChild(sectionProps.First());
+ }
+ else
+ {
+ var reference = sectionProps.SelectMany(sectPr => sectPr.Elements())
+ .Where(r => r.Id?.HasValue == true)
+ .FirstOrDefault(r => r.Type?.Value == type);
+
+ if (reference != null)
+ part = (TPart) mainPart.GetPartById(reference.Id!);
+ wasRefSet = part is not null;
+ }
+
+ part ??= mainPart.AddNewPart();
+
+ if (!wasRefSet)
+ {
+ sectionProps.First().PrependChild(new TRefType() {
+ Id = mainPart.GetIdOfPart(part),
+ Type = type
+ });
+ }
+
+ return part;
+ }
+
//____________________________________________________________________
//
// Configuration
@@ -216,10 +382,10 @@ public WordDocumentStyle HtmlStyles
public bool ContinueNumbering { get; set; } = true;
///
- /// Resolve a remote or inline image resource.
+ /// Gets the mainDocumentPart of the destination OpenXml document.
///
- internal ImagePrefetcher ImagePrefetcher
+ internal MainDocumentPart MainPart
{
- get => imagePrefetcher ??= new ImagePrefetcher(mainPart, webRequester);
+ get => mainPart;
}
}
diff --git a/src/Html2OpenXml/HtmlToOpenXml.csproj b/src/Html2OpenXml/HtmlToOpenXml.csproj
index 34e3b321..b9475508 100644
--- a/src/Html2OpenXml/HtmlToOpenXml.csproj
+++ b/src/Html2OpenXml/HtmlToOpenXml.csproj
@@ -9,13 +9,13 @@
HtmlToOpenXml
HtmlToOpenXml
HtmlToOpenXml.dll
- 3.1.1
+ 3.2.0
icon.png
Copyright 2009-$([System.DateTime]::Now.Year) Olivier Nizet
See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.md
README.md
office openxml netcore html
- 3.1.1
+ 3.2.0
MIT
https://github.com/onizet/html2openxml
https://github.com/onizet/html2openxml
@@ -45,11 +45,15 @@
-
+
+
+ $(DefineConstants);DEBUG
+ false
+
true
snupkg
diff --git a/src/Html2OpenXml/IO/DefaultWebRequest.cs b/src/Html2OpenXml/IO/DefaultWebRequest.cs
index 2c85335c..9414a02b 100644
--- a/src/Html2OpenXml/IO/DefaultWebRequest.cs
+++ b/src/Html2OpenXml/IO/DefaultWebRequest.cs
@@ -63,7 +63,17 @@ public DefaultWebRequest(HttpClient httpClient, ILogger? logger = null)
requestUri = UrlCombine(BaseImageUrl, requestUri.OriginalString);
}
- if (requestUri.IsFile)
+ bool isLocalFile;
+ try
+ {
+ isLocalFile = requestUri.IsFile;
+ }
+ catch (InvalidOperationException)
+ {
+ isLocalFile = false;
+ }
+
+ if (isLocalFile)
{
return DownloadLocalFile(requestUri, cancellationToken);
}
@@ -108,6 +118,9 @@ public DefaultWebRequest(HttpClient httpClient, ILogger? logger = null)
{
logger?.LogDebug("Downloading remote file: {0}", requestUri);
+ if (httpClient.BaseAddress is null && !requestUri.IsAbsoluteUri)
+ return null;
+
var response = await httpClient.GetAsync(requestUri, cancellationToken).ConfigureAwait(false);
if (response == null) return null;
resource.StatusCode = response.StatusCode;
diff --git a/src/Html2OpenXml/IO/ImageHeader.cs b/src/Html2OpenXml/IO/ImageHeader.cs
index 762c9c90..858a923d 100755
--- a/src/Html2OpenXml/IO/ImageHeader.cs
+++ b/src/Html2OpenXml/IO/ImageHeader.cs
@@ -18,6 +18,7 @@
using System.IO;
using System.Linq;
using System.Text;
+using System.Xml.XPath;
namespace HtmlToOpenXml.IO;
@@ -29,7 +30,7 @@ public static class ImageHeader
// https://en.wikipedia.org/wiki/List_of_file_signatures
#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member
- public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf }
+ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml }
#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member
private static readonly byte[] pngSignatureBytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
@@ -41,7 +42,8 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf }
{ Encoding.UTF8.GetBytes("GIF89a"), FileType.Gif }, // animated gif
{ pngSignatureBytes, FileType.Png },
{ new byte[] { 0xff, 0xd8 }, FileType.Jpeg },
- { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf }
+ { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf },
+ { Encoding.UTF8.GetBytes("
- /// Download and provison the metadata of a requested image.
+ /// Download the remote or local image located at the specified url.
///
- sealed class ImagePrefetcher
+ Task Download(string imageUri, CancellationToken cancellationToken);
+}
+
+///
+/// Download and provison the metadata of a requested image.
+///
+sealed class ImagePrefetcher : IImageLoader
+ where T: OpenXmlPartContainer, ISupportedRelationship
+{
+ // Map extension to PartTypeInfo
+ private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) {
+ { ".gif", ImagePartType.Gif },
+ { ".bmp", ImagePartType.Bmp },
+ { ".emf", ImagePartType.Emf },
+ { ".ico", ImagePartType.Icon },
+ { ".jp2", ImagePartType.Jp2 },
+ { ".jpeg", ImagePartType.Jpeg },
+ { ".jpg", ImagePartType.Jpeg },
+ { ".jpe", ImagePartType.Jpeg },
+ { ".pcx", ImagePartType.Pcx },
+ { ".png", ImagePartType.Png },
+ { ".svg", ImagePartType.Svg },
+ { ".tif", ImagePartType.Tif },
+ { ".tiff", ImagePartType.Tiff },
+ { ".wmf", ImagePartType.Wmf }
+ };
+ private readonly T hostingPart;
+ private readonly IWebRequest resourceLoader;
+ private readonly HtmlImageInfoCollection prefetchedImages;
+
+
+ ///
+ /// Constructor.
+ ///
+ /// The image will be linked to that hosting part.
+ /// Images are not shared between header, footer and body.
+ /// Service to resolve an image.
+ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
{
- // Map extension to PartTypeInfo
- private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) {
- { ".gif", ImagePartType.Gif },
- { ".bmp", ImagePartType.Bmp },
- { ".emf", ImagePartType.Emf },
- { ".ico", ImagePartType.Icon },
- { ".jp2", ImagePartType.Jp2 },
- { ".jpeg", ImagePartType.Jpeg },
- { ".jpg", ImagePartType.Jpeg },
- { ".jpe", ImagePartType.Jpeg },
- { ".pcx", ImagePartType.Pcx },
- { ".png", ImagePartType.Png },
- { ".svg", ImagePartType.Svg },
- { ".tif", ImagePartType.Tif },
- { ".tiff", ImagePartType.Tiff },
- { ".wmf", ImagePartType.Wmf }
- };
- private readonly MainDocumentPart mainPart;
- private readonly IWebRequest resourceLoader;
- private readonly HtmlImageInfoCollection prefetchedImages;
-
-
- public ImagePrefetcher(MainDocumentPart mainPart, IWebRequest resourceLoader)
- {
- this.mainPart = mainPart;
- this.resourceLoader = resourceLoader;
- this.prefetchedImages = new HtmlImageInfoCollection();
- }
+ this.hostingPart = hostingPart;
+ this.resourceLoader = resourceLoader;
+ this.prefetchedImages = new HtmlImageInfoCollection();
+ }
+
+ //____________________________________________________________________
+ //
+ // Public Functionality
- //____________________________________________________________________
- //
- // Public Functionality
+ ///
+ /// Download the remote or local image located at the specified url.
+ ///
+ public async Task Download(string imageUri, CancellationToken cancellationToken)
+ {
+ if (prefetchedImages.Contains(imageUri))
+ return prefetchedImages[imageUri];
- ///
- /// Download the remote or local image located at the specified url.
- ///
- public async Task Download(string imageUri, CancellationToken cancellationToken)
+ HtmlImageInfo? iinfo;
+ if (DataUri.IsWellFormed(imageUri)) // data inline, encoded in base64
{
- if (prefetchedImages.Contains(imageUri))
- return prefetchedImages[imageUri];
+ iinfo = ReadDataUri(imageUri);
+ }
+ else
+ {
+ iinfo = await DownloadRemoteImage(imageUri, cancellationToken);
+ }
- HtmlImageInfo? iinfo;
- if (DataUri.IsWellFormed(imageUri)) // data inline, encoded in base64
- {
- iinfo = ReadDataUri(imageUri);
- }
- else
- {
- iinfo = await DownloadRemoteImage(imageUri, cancellationToken);
- }
+ if (iinfo != null)
+ prefetchedImages.Add(iinfo);
- if (iinfo != null)
- prefetchedImages.Add(iinfo);
+ return iinfo;
+ }
- return iinfo;
- }
+ ///
+ /// Download the image and try to find its format type.
+ ///
+ private async Task DownloadRemoteImage(string src, CancellationToken cancellationToken)
+ {
+ Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute);
+ if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme))
+ return null;
- ///
- /// Download the image and try to find its format type.
- ///
- private async Task DownloadRemoteImage(string src, CancellationToken cancellationToken)
- {
- Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute);
- if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme))
- return null;
+ Resource? response;
- Resource? response;
+ response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false);
+ if (response?.Content == null)
+ return null;
- response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false);
- if (response?.Content == null)
+ using (response)
+ {
+ // For requested url with no filename, we need to read the media mime type if provided
+ response.Headers.TryGetValue("Content-Type", out var mime);
+ if (!TryInspectMimeType(mime, out PartTypeInfo type)
+ && !TryGuessTypeFromUri(imageUri, out type)
+ && !TryGuessTypeFromStream(response.Content, out type))
+ {
return null;
+ }
- HtmlImageInfo info = new HtmlImageInfo(src);
- using (response)
+ var ipart = hostingPart.AddImagePart(type);
+ Size originalSize;
+ using (var outputStream = ipart.GetStream(FileMode.Create))
{
- // For requested url with no filename, we need to read the media mime type if provided
- response.Headers.TryGetValue("Content-Type", out var mime);
- if (!TryInspectMimeType(mime, out PartTypeInfo type)
- && !TryGuessTypeFromUri(imageUri, out type)
- && !TryGuessTypeFromStream(response.Content, out type))
- {
- return null;
- }
-
- var ipart = mainPart.AddImagePart(type);
- using (var outputStream = ipart.GetStream(FileMode.Create))
- {
- response.Content.CopyTo(outputStream);
-
- outputStream.Seek(0L, SeekOrigin.Begin);
- info.Size = GetImageSize(outputStream);
- }
-
- info.ImagePartId = mainPart.GetIdOfPart(ipart);
- return info;
+ response.Content.CopyTo(outputStream);
+
+ outputStream.Seek(0L, SeekOrigin.Begin);
+ originalSize = GetImageSize(outputStream);
}
+
+ return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
+ TypeInfo = type,
+ Size = originalSize
+ };
}
+ }
- ///
- /// Parse the Data inline image.
- ///
- private HtmlImageInfo? ReadDataUri(string src)
+ ///
+ /// Parse the Data inline image.
+ ///
+ private HtmlImageInfo? ReadDataUri(string src)
+ {
+ if (DataUri.TryCreate(src, out var dataUri))
{
- if (DataUri.TryCreate(src, out var dataUri))
+ Size originalSize;
+ knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
+ var ipart = hostingPart.AddImagePart(type);
+ using (var outputStream = ipart.GetStream(FileMode.Create))
{
- Size size;
- knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
- var ipart = mainPart.AddImagePart(type);
- using (var outputStream = ipart.GetStream(FileMode.Create))
- {
- outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);
-
- outputStream.Seek(0L, SeekOrigin.Begin);
- size = GetImageSize(outputStream);
- }
-
- return new HtmlImageInfo(src) {
- ImagePartId = mainPart.GetIdOfPart(ipart),
- Size = size
- };
+ outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);
+
+ outputStream.Seek(0L, SeekOrigin.Begin);
+ originalSize = GetImageSize(outputStream);
}
- return null;
+ return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
+ TypeInfo = type,
+ Size = originalSize
+ };
}
- //____________________________________________________________________
- //
- // Private Implementation
-
- // http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature
- private static readonly Dictionary knownContentType = new(StringComparer.OrdinalIgnoreCase) {
- { "image/gif", ImagePartType.Gif },
- { "image/pjpeg", ImagePartType.Jpeg },
- { "image/jp2", ImagePartType.Jp2 },
- { "image/jpg", ImagePartType.Jpeg },
- { "image/jpeg", ImagePartType.Jpeg },
- { "image/x-png", ImagePartType.Png },
- { "image/png", ImagePartType.Png },
- { "image/tiff", ImagePartType.Tiff },
- { "image/vnd.microsoft.icon", ImagePartType.Icon },
- // these icons mime type are wrong but we should nevertheless take care (http://en.wikipedia.org/wiki/ICO_%28file_format%29#MIME_type)
- { "image/x-icon", ImagePartType.Icon },
- { "image/icon", ImagePartType.Icon },
- { "image/ico", ImagePartType.Icon },
- { "text/ico", ImagePartType.Icon },
- { "text/application-ico", ImagePartType.Icon },
- { "image/bmp", ImagePartType.Bmp },
- { "image/svg+xml", ImagePartType.Svg },
- };
-
- ///
- /// Inspect the response headers of a web request and decode the mime type if provided
- ///
- /// Returns the extension of the image if provideds.
- private static bool TryInspectMimeType(string? contentType, out PartTypeInfo type)
- {
- // can be null when the protocol used doesn't allow response headers
- if (contentType != null &&
- knownContentType.TryGetValue(contentType, out type))
- return true;
+ return null;
+ }
- type = default;
- return false;
- }
+ //____________________________________________________________________
+ //
+ // Private Implementation
- ///
- /// Gets the OpenXml PartTypeInfo associated to an image.
- ///
- private static bool TryGuessTypeFromUri(Uri uri, out PartTypeInfo type)
- {
- string extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.Segments[uri.Segments.Length - 1] : uri.OriginalString);
- if (knownExtensions.TryGetValue(extension, out type)) return true;
+ // http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature
+ private static readonly Dictionary knownContentType = new(StringComparer.OrdinalIgnoreCase) {
+ { "image/gif", ImagePartType.Gif },
+ { "image/pjpeg", ImagePartType.Jpeg },
+ { "image/jp2", ImagePartType.Jp2 },
+ { "image/jpg", ImagePartType.Jpeg },
+ { "image/jpeg", ImagePartType.Jpeg },
+ { "image/x-png", ImagePartType.Png },
+ { "image/png", ImagePartType.Png },
+ { "image/tiff", ImagePartType.Tiff },
+ { "image/vnd.microsoft.icon", ImagePartType.Icon },
+ // these icons mime type are wrong but we should nevertheless take care (http://en.wikipedia.org/wiki/ICO_%28file_format%29#MIME_type)
+ { "image/x-icon", ImagePartType.Icon },
+ { "image/icon", ImagePartType.Icon },
+ { "image/ico", ImagePartType.Icon },
+ { "text/ico", ImagePartType.Icon },
+ { "text/application-ico", ImagePartType.Icon },
+ { "image/bmp", ImagePartType.Bmp },
+ { "image/svg+xml", ImagePartType.Svg },
+ };
- // extension not recognized, try with checking the query string. Expecting to resolve something like:
- // ./image.axd?picture=img1.jpg
- extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.AbsoluteUri : uri.ToString());
- if (knownExtensions.TryGetValue(extension, out type)) return true;
+ ///
+ /// Inspect the response headers of a web request and decode the mime type if provided
+ ///
+ /// Returns the extension of the image if provideds.
+ private static bool TryInspectMimeType(string? contentType, out PartTypeInfo type)
+ {
+ // can be null when the protocol used doesn't allow response headers
+ if (contentType != null &&
+ knownContentType.TryGetValue(contentType, out type))
+ return true;
- return false;
- }
+ type = default;
+ return false;
+ }
- ///
- /// Gets the OpenXml PartTypeInfo associated to an image.
- ///
- private static bool TryGuessTypeFromStream(Stream stream, out PartTypeInfo type)
+ ///
+ /// Gets the OpenXml PartTypeInfo associated to an image.
+ ///
+ private static bool TryGuessTypeFromUri(Uri uri, out PartTypeInfo type)
+ {
+ string extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.Segments[uri.Segments.Length - 1] : uri.OriginalString);
+ if (knownExtensions.TryGetValue(extension, out type)) return true;
+
+ // extension not recognized, try with checking the query string. Expecting to resolve something like:
+ // ./image.axd?picture=img1.jpg
+ extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.AbsoluteUri : uri.ToString());
+ if (knownExtensions.TryGetValue(extension, out type)) return true;
+
+ return false;
+ }
+
+ ///
+ /// Gets the OpenXml PartTypeInfo associated to an image.
+ ///
+ private static bool TryGuessTypeFromStream(Stream stream, out PartTypeInfo type)
+ {
+ if (ImageHeader.TryDetectFileType(stream, out ImageHeader.FileType guessType))
{
- if (ImageHeader.TryDetectFileType(stream, out ImageHeader.FileType guessType))
+ switch (guessType)
{
- switch (guessType)
- {
- case ImageHeader.FileType.Bitmap: type = ImagePartType.Bmp; return true;
- case ImageHeader.FileType.Emf: type = ImagePartType.Emf; return true;
- case ImageHeader.FileType.Gif: type = ImagePartType.Gif; return true;
- case ImageHeader.FileType.Jpeg: type = ImagePartType.Jpeg; return true;
- case ImageHeader.FileType.Png: type = ImagePartType.Png; return true;
- }
+ case ImageHeader.FileType.Bitmap: type = ImagePartType.Bmp; return true;
+ case ImageHeader.FileType.Emf: type = ImagePartType.Emf; return true;
+ case ImageHeader.FileType.Gif: type = ImagePartType.Gif; return true;
+ case ImageHeader.FileType.Jpeg: type = ImagePartType.Jpeg; return true;
+ case ImageHeader.FileType.Png: type = ImagePartType.Png; return true;
}
- type = ImagePartType.Bmp;
- return false;
}
+ type = ImagePartType.Bmp;
+ return false;
+ }
- ///
- /// Loads an image from a stream and grab its size.
- ///
- private static Size GetImageSize(Stream imageStream)
+ ///
+ /// Loads an image from a stream and grab its size.
+ ///
+ private static Size GetImageSize(Stream imageStream)
+ {
+ // Read only the size of the image
+ try
{
- // Read only the size of the image
- try
- {
- return ImageHeader.GetDimensions(imageStream);
- }
- catch (ArgumentException)
- {
- return Size.Empty;
- }
+ return ImageHeader.GetDimensions(imageStream);
+ }
+ catch (ArgumentException)
+ {
+ return Size.Empty;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Html2OpenXml/ParsingContext.cs b/src/Html2OpenXml/ParsingContext.cs
index a26bc9f4..18dd7657 100644
--- a/src/Html2OpenXml/ParsingContext.cs
+++ b/src/Html2OpenXml/ParsingContext.cs
@@ -20,14 +20,19 @@ namespace HtmlToOpenXml;
/// Contains information that is global to the parsing.
///
/// The list of paragraphs that will be returned.
-sealed class ParsingContext(HtmlConverter converter, MainDocumentPart mainPart)
+sealed class ParsingContext(HtmlConverter converter, OpenXmlPartContainer hostingPart, IO.IImageLoader imageLoader)
{
/// Shorthand for .HtmlStyles
public WordDocumentStyle DocumentStyle { get => Converter.HtmlStyles; }
public HtmlConverter Converter { get; } = converter;
- public MainDocumentPart MainPart { get; } = mainPart;
+ public MainDocumentPart MainPart { get; } = converter.MainPart;
+
+ public OpenXmlPartContainer HostingPart { get; } = hostingPart;
+
+ public IO.IImageLoader ImageLoader { get; } = imageLoader;
+
private HtmlElementExpression? parentExpression;
private ParsingContext? parentContext;
@@ -49,7 +54,7 @@ public void CascadeStyles (OpenXmlElement element)
public ParsingContext CreateChild(HtmlElementExpression expression)
{
- var childContext = new ParsingContext(Converter, MainPart) {
+ var childContext = new ParsingContext(Converter, HostingPart, ImageLoader) {
propertyBag = propertyBag,
parentExpression = expression,
parentContext = this
diff --git a/src/Html2OpenXml/PredefinedStyles.cs b/src/Html2OpenXml/PredefinedStyles.cs
index 4e0d5356..a7cf2a5a 100755
--- a/src/Html2OpenXml/PredefinedStyles.cs
+++ b/src/Html2OpenXml/PredefinedStyles.cs
@@ -22,6 +22,9 @@ internal class PredefinedStyles
public const string Quote = "Quote";
public const string QuoteChar = "QuoteChar";
public const string TableGrid = "TableGrid";
+ public const string Header = "Header";
+ public const string Footer = "Footer";
+
///
diff --git a/src/Html2OpenXml/Primitives/DefaultStyles.cs b/src/Html2OpenXml/Primitives/DefaultStyles.cs
index ca57ac29..44d2a1ce 100644
--- a/src/Html2OpenXml/Primitives/DefaultStyles.cs
+++ b/src/Html2OpenXml/Primitives/DefaultStyles.cs
@@ -67,7 +67,7 @@ public class DefaultStyles
public string ListParagraphStyle { get; set; } = PredefinedStyles.ListParagraph;
///
- /// Default style for the <pre> table
+ /// Default style for the pre table
///
/// TableGrid
public string PreTableStyle { get; set; } = PredefinedStyles.TableGrid;
@@ -89,4 +89,16 @@ public class DefaultStyles
///
/// TableGrid
public string TableStyle { get; set; } = PredefinedStyles.TableGrid;
+
+ ///
+ /// Default style for header paragraphs.
+ ///
+ /// Header
+ public string HeaderStyle { get; set; } = PredefinedStyles.Header;
+
+ ///
+ /// Default style for footer paragraphs.
+ ///
+ /// Footer
+ public string FooterStyle { get; set; } = PredefinedStyles.Footer;
}
\ No newline at end of file
diff --git a/src/Html2OpenXml/Primitives/HtmlImageInfo.cs b/src/Html2OpenXml/Primitives/HtmlImageInfo.cs
index 7ecda1f6..4ffca088 100755
--- a/src/Html2OpenXml/Primitives/HtmlImageInfo.cs
+++ b/src/Html2OpenXml/Primitives/HtmlImageInfo.cs
@@ -16,7 +16,7 @@ namespace HtmlToOpenXml;
///
/// Represents an image and its metadata.
///
-sealed class HtmlImageInfo(string source)
+sealed class HtmlImageInfo(string source, string partId)
{
///
/// The URI identifying this cached image information.
@@ -26,12 +26,17 @@ sealed class HtmlImageInfo(string source)
///
/// The Unique identifier of the ImagePart in the .
///
- public string? ImagePartId { get; set; }
+ public string ImagePartId { get; set; } = partId;
///
- /// Gets or sets the size of the image
+ /// Gets or sets the original size of the image.
///
public Size Size { get; set; }
+
+ ///
+ /// Gets the content type of the image.
+ ///
+ public PartTypeInfo TypeInfo { get; set; }
}
///
diff --git a/src/Html2OpenXml/Primitives/Unit.cs b/src/Html2OpenXml/Primitives/Unit.cs
index e7370c1b..938df509 100755
--- a/src/Html2OpenXml/Primitives/Unit.cs
+++ b/src/Html2OpenXml/Primitives/Unit.cs
@@ -111,7 +111,7 @@ private static long ComputeInEmus(UnitMetric type, double value)
case UnitMetric.Emus: return (long) value;
case UnitMetric.Inch: return (long) (value * 914400L);
case UnitMetric.Centimeter: return (long) (value * 360000L);
- case UnitMetric.Millimeter: return (long) (value * 3600000L);
+ case UnitMetric.Millimeter: return (long) (value * 36000L);
case UnitMetric.EM:
// well this is a rough conversion but considering 1em = 12pt (http://sureshjain.wordpress.com/2007/07/06/53/)
return (long) (value / 72 * 914400L * 12);
diff --git a/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs b/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs
index b7c38b90..3d896241 100644
--- a/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs
+++ b/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs
@@ -39,6 +39,17 @@ public static HtmlAttributeCollection GetStyles(this IElement element)
return null;
}
+ ///
+ /// Gets whether the anchor is redirect to the `top` of the document.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsTopAnchor(this IHtmlAnchorElement element)
+ {
+ if (element.Hash.Length <= 1) return false;
+ return "#top".Equals(element.Hash, StringComparison.OrdinalIgnoreCase)
+ || "#_top".Equals(element.Hash, StringComparison.OrdinalIgnoreCase);
+ }
+
///
/// Gets whether the given child is preceded by any list element (ol or ul ).
///
diff --git a/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs b/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs
index e79da7c5..5242f49e 100755
--- a/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs
+++ b/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs
@@ -42,15 +42,6 @@ public static bool HasChild(this OpenXmlElement element) where T : OpenXmlEle
return null;
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void InsertInDocProperties(this Drawing d, params OpenXmlElement[] newChildren)
- {
- d.Inline ??= new Inline();
- if (d.Inline.DocProperties == null) d.Inline.DocProperties = new DocProperties();
-
- d.Inline.DocProperties.Append(newChildren);
- }
-
public static bool Compare(this PageSize pageSize, PageOrientationValues orientation)
{
PageOrientationValues pageOrientation;
diff --git a/test/HtmlToOpenXml.Tests/AbbrTests.cs b/test/HtmlToOpenXml.Tests/AbbrTests.cs
index f3063ba0..2f65ad1f 100644
--- a/test/HtmlToOpenXml.Tests/AbbrTests.cs
+++ b/test/HtmlToOpenXml.Tests/AbbrTests.cs
@@ -101,7 +101,7 @@ public void Empty_ShouldBeIgnored()
Assert.That(elements, Is.Empty);
}
- [TestCase("Placeholder ")]
+ [TestCase("Placeholder ")]
[TestCase("Placeholder ")]
[TestCase("Placeholder ")]
public void WithNoDescription_ReturnsSimpleParagraph(string html)
@@ -110,6 +110,8 @@ public void WithNoDescription_ReturnsSimpleParagraph(string html)
var elements = converter.Parse(html);
Assert.That(elements, Has.Count.EqualTo(1));
Assert.That(elements, Is.All.TypeOf());
+ Assert.That(mainPart.FootnotesPart, Is.Null);
+ Assert.That(mainPart.EndnotesPart, Is.Null);
}
[TestCase("HTML ", AcronymPosition.DocumentEnd, Description = "Read existing endnotes references")]
@@ -177,5 +179,41 @@ public void InsideParagraph_ReturnsMultipleRuns()
Assert.That(elements[0].Elements().Any(r => r.HasChild()), Is.True);
});
}
+
+ [TestCase("NASA ")]
+ [TestCase("NASA ")]
+ public async Task ParseIntoHeader_ReturnsSimpleParagraph(string html)
+ {
+ await converter.ParseHeader(html);
+ var header = mainPart.HeaderParts?.FirstOrDefault()?.Header;
+ Assert.That(header, Is.Not.Null);
+ Assert.That(header.ChildElements, Has.Count.EqualTo(1));
+ Assert.Multiple(() =>
+ {
+ Assert.That(header.ChildElements, Is.All.TypeOf());
+ Assert.That(header.FirstChild!.InnerText, Is.EqualTo("NASA"));
+ Assert.That(mainPart.FootnotesPart, Is.Null);
+ Assert.That(mainPart.EndnotesPart, Is.Null);
+ AssertThatOpenXmlDocumentIsValid();
+ });
+ }
+
+ [TestCase("NASA ")]
+ [TestCase("NASA ")]
+ public async Task ParseIntoFooter_ShouldBeIgnored(string html)
+ {
+ await converter.ParseFooter(html);
+ var footer = mainPart.FooterParts?.FirstOrDefault()?.Footer;
+ Assert.That(footer, Is.Not.Null);
+ Assert.That(footer.ChildElements, Has.Count.EqualTo(1));
+ Assert.Multiple(() =>
+ {
+ Assert.That(footer.ChildElements, Is.All.TypeOf());
+ Assert.That(footer.FirstChild!.InnerText, Is.EqualTo("NASA"));
+ Assert.That(mainPart.FootnotesPart, Is.Null);
+ Assert.That(mainPart.EndnotesPart, Is.Null);
+ AssertThatOpenXmlDocumentIsValid();
+ });
+ }
}
}
\ No newline at end of file
diff --git a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs
new file mode 100644
index 00000000..1f399e6b
--- /dev/null
+++ b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs
@@ -0,0 +1,113 @@
+using NUnit.Framework;
+using DocumentFormat.OpenXml.Wordprocessing;
+using DocumentFormat.OpenXml.Packaging;
+
+namespace HtmlToOpenXml.Tests
+{
+ ///
+ /// Tests on ParseHeader and ParseFooter methods.
+ ///
+ [TestFixture]
+ public class HeaderFooterTests : HtmlConverterTestBase
+ {
+ [Test]
+ public async Task Header_ReturnsHeaderPartLinkedToBody()
+ {
+ await converter.ParseHeader("Header content
", HeaderFooterValues.First);
+
+ var headerPart = mainPart.HeaderParts?.FirstOrDefault();
+ Assert.That(headerPart, Is.Not.Null);
+ Assert.That(headerPart.Header, Is.Not.Null);
+ var p = headerPart.Header.Elements();
+ Assert.That(p, Is.Not.Empty);
+ Assert.That(p.Select(p => p.ParagraphProperties?.ParagraphStyleId?.Val?.Value),
+ Has.All.EqualTo(converter.HtmlStyles.DefaultStyles.HeaderStyle));
+
+ var sectionProperties = mainPart.Document.Body!.Elements();
+ Assert.That(sectionProperties, Is.Not.Empty);
+ Assert.That(sectionProperties.SelectMany(s => s.Elements())
+ .Any(r => r.Type?.Value == HeaderFooterValues.First), Is.True);
+ AssertThatOpenXmlDocumentIsValid();
+ }
+
+ [Test]
+ public async Task Footer_ReturnsFooterPartLinkedToBody()
+ {
+ await converter.ParseFooter("Footer content
");
+
+ var footerPart = mainPart.FooterParts?.FirstOrDefault();
+ Assert.That(footerPart, Is.Not.Null);
+ Assert.That(footerPart.Footer, Is.Not.Null);
+
+ var sectionProperties = mainPart.Document.Body!.Elements();
+ Assert.That(sectionProperties, Is.Not.Empty);
+ Assert.That(sectionProperties.Any(s => s.HasChild()), Is.True);
+ AssertThatOpenXmlDocumentIsValid();
+ }
+
+ [Test(Description = "Overwrite existing Default header")]
+ public async Task WithExistingHeader_Default_ReturnsOverridenHeaderPart()
+ {
+ using var generatedDocument = new MemoryStream();
+ using (var buffer = ResourceHelper.GetStream("Resources.DocWithImgHeaderFooter.docx"))
+ buffer.CopyTo(generatedDocument);
+
+ generatedDocument.Position = 0L;
+ using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true);
+ MainDocumentPart mainPart = package.MainDocumentPart!;
+
+ var sectionProperties = mainPart.Document.Body!.Elements();
+ Assert.That(sectionProperties, Is.Not.Empty);
+ var headerRefs = sectionProperties.SelectMany(s => s.Elements());
+ Assert.Multiple(() =>
+ {
+ Assert.That(headerRefs.Count(), Is.EqualTo(1));
+ Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1), "Default header exist");
+ });
+
+ HtmlConverter converter = new(mainPart);
+ await converter.ParseHeader("Header content");
+
+ sectionProperties = mainPart.Document.Body!.Elements();
+ Assert.That(sectionProperties, Is.Not.Empty);
+ Assert.That(sectionProperties.SelectMany(s => s.Elements())
+ .Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1));
+ AssertThatOpenXmlDocumentIsValid();
+ }
+
+ [Test(Description = "Create additional header for even pages")]
+ public async Task WithExistingHeader_Even_ReturnsAnotherHeaderPart()
+ {
+ using var generatedDocument = new MemoryStream();
+ using (var buffer = ResourceHelper.GetStream("Resources.DocWithImgHeaderFooter.docx"))
+ buffer.CopyTo(generatedDocument);
+
+ generatedDocument.Position = 0L;
+ using WordprocessingDocument package = WordprocessingDocument.Open(generatedDocument, true);
+ MainDocumentPart mainPart = package.MainDocumentPart!;
+
+ var sectionProperties = mainPart.Document.Body!.Elements();
+ Assert.That(sectionProperties, Is.Not.Empty);
+ var headerRefs = sectionProperties.SelectMany(s => s.Elements());
+ Assert.Multiple(() =>
+ {
+ Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1), "Default header exist");
+ Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Even), Is.Zero, "No event header has been yet defined");
+ });
+
+ HtmlConverter converter = new(mainPart);
+ await converter.ParseHeader("Header even content", HeaderFooterValues.Even);
+
+ sectionProperties = mainPart.Document.Body!.Elements();
+ Assert.That(sectionProperties, Is.Not.Empty);
+ Assert.That(sectionProperties.Count(s => s.HasChild()), Is.EqualTo(1));
+ headerRefs = sectionProperties.SelectMany(s => s.Elements());
+ Assert.Multiple(() =>
+ {
+ Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Default), Is.EqualTo(1));
+ Assert.That(headerRefs.Count(r => r.Type?.Value == HeaderFooterValues.Even), Is.EqualTo(1));
+ });
+ AssertThatOpenXmlDocumentIsValid();
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/HtmlToOpenXml.Tests/HrTests.cs b/test/HtmlToOpenXml.Tests/HrTests.cs
index 8df6af9c..b7fdc1d5 100644
--- a/test/HtmlToOpenXml.Tests/HrTests.cs
+++ b/test/HtmlToOpenXml.Tests/HrTests.cs
@@ -17,13 +17,25 @@ public void Standalone_ReturnsWithNoSpacing ()
AssertIsHr(elements[0], false);
}
- [Test(Description = "should not generate a particular spacing because border-bottom is empty")]
+ [Test(Description = "Should not generate a particular spacing because border-bottom is empty")]
public void AfterBorderlessContent_ReturnsWithNoSpacing ()
{
var elements = converter.Parse("Before
");
AssertIsHr(elements[1], false);
}
+ [Test(Description = "User can provide his own stylised horizontal separator")]
+ public void Bordered_ReturnsWithStylisedBorder ()
+ {
+ var elements = converter.Parse(" ");
+ AssertIsHr(elements[0], false);
+ var borders = elements[0].GetFirstChild()?.ParagraphBorders;
+ Assert.That(borders, Is.Not.Null);
+ Assert.That(borders.TopBorder?.Val?.Value, Is.EqualTo(BorderValues.Dotted));
+ Assert.That(borders.TopBorder?.Color?.Value, Is.EqualTo("FF0000"));
+ Assert.That(borders.TopBorder?.Size?.Value, Is.EqualTo(2));
+ }
+
[TestCase("Before
")]
[TestCase("Before
")]
[TestCase(" ")]
diff --git a/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs b/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs
index ca50f928..3e056798 100644
--- a/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs
+++ b/test/HtmlToOpenXml.Tests/HtmlConverterTestBase.cs
@@ -1,5 +1,6 @@
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml.Validation;
using DocumentFormat.OpenXml.Wordprocessing;
using NUnit.Framework;
@@ -36,5 +37,21 @@ public void Close ()
package?.Dispose();
generatedDocument?.Dispose();
}
+
+ protected void AssertThatOpenXmlDocumentIsValid()
+ {
+ var validator = new OpenXmlValidator(FileFormatVersions.Office2021);
+ var errors = validator.Validate(package);
+
+ if (!errors.GetEnumerator().MoveNext())
+ return;
+
+ foreach (ValidationErrorInfo error in errors)
+ {
+ TestContext.Error.Write("{0}\n\t{1}\n", error.Path?.XPath, error.Description);
+ }
+
+ Assert.Fail("The document isn't conformant with Office 2021");
+ }
}
}
\ No newline at end of file
diff --git a/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj b/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj
index d612ff3c..e781a424 100755
--- a/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj
+++ b/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj
@@ -16,11 +16,11 @@
runtime; build; native; contentfiles; analyzers; buildtransitive
all
-
-
-
+
+
+
-
+
all
runtime; build; native; contentfiles; analyzers
diff --git a/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs b/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs
index 8f8a14f6..8dabcd50 100644
--- a/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs
+++ b/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs
@@ -28,6 +28,7 @@ public void GuessFormat_ReturnsImageSize((string resourceName, Size expectedSize
yield return ("Resources.html2openxml.emf", new Size(100, 100));
// animated gif:
yield return ("Resources.stan.gif", new Size(252, 318));
+ yield return ("Resources.kiwi.svg", new Size(612, 502));
}
///
@@ -53,6 +54,7 @@ public void PngSof2_ReturnsImageSize()
[TestCase("Resources.html2openxml.gif", ExpectedResult = ImageHeader.FileType.Gif)]
[TestCase("Resources.html2openxml.jpg", ExpectedResult = ImageHeader.FileType.Jpeg)]
[TestCase("Resources.html2openxml.png", ExpectedResult = ImageHeader.FileType.Png)]
+ [TestCase("Resources.kiwi.svg", ExpectedResult = ImageHeader.FileType.Xml)]
public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName)
{
using var imageStream = ResourceHelper.GetStream(resourceName);
diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs
index 72a23534..64d97ae6 100644
--- a/test/HtmlToOpenXml.Tests/ImgTests.cs
+++ b/test/HtmlToOpenXml.Tests/ImgTests.cs
@@ -15,12 +15,14 @@ namespace HtmlToOpenXml.Tests
[TestFixture]
public class ImgTests : HtmlConverterTestBase
{
- [Test]
- public void AbsoluteUri_ReturnsDrawing_WithDownloadedData()
+ [TestCase("https://www.w3schools.com/tags/smiley.gif", "image/gif")]
+ [TestCase("https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/helloworld.svg", "image/svg+xml")]
+ public void AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType)
{
- var elements = converter.Parse(@" ");
+ var elements = converter.Parse(@$" ");
Assert.That(elements, Has.Count.EqualTo(1));
- AssertIsImg(elements[0]);
+ var (_, imagePart) = AssertIsImg(mainPart, elements[0]);
+ Assert.That(imagePart.ContentType, Is.EqualTo(contentType));
}
[Test]
@@ -28,14 +30,14 @@ public void DataUri_ReturnsDrawing_WithDecryptedData()
{
var elements = converter.Parse(@" ");
Assert.That(elements, Has.Count.EqualTo(1));
- AssertIsImg(elements[0]);
+ AssertIsImg(mainPart, elements[0]);
}
[Test]
public void WithBorder_ReturnsRunWithBorder()
{
var elements = converter.Parse(@" ");
- AssertIsImg(elements[0]);
+ AssertIsImg(mainPart, elements[0]);
var run = elements[0].GetFirstChild();
var runProperties = run?.GetFirstChild();
Assert.That(runProperties, Is.Not.Null);
@@ -55,11 +57,11 @@ public void ManualProvisioning_ReturnsDrawing_WithProvidedData()
var elements = converter.Parse(@" ");
Assert.That(elements, Has.Count.EqualTo(1));
- AssertIsImg(elements[0]);
+ AssertIsImg(mainPart, elements[0]);
}
- [TestCase(" ", Description = "Empty image")]
[TestCase(" ", Description = "Unsupported protocol")]
+ [TestCase(" ", Description = "Relative url without providing BaseImagerUri")]
public void IgnoreImage_ShouldBeIgnored(string html)
{
var elements = converter.Parse(html);
@@ -88,9 +90,20 @@ public async Task FileSystem_LocalImage_WithSpaceInName_ShouldSucceed()
await resourceStream.CopyToAsync(fileStream);
var localUri = "file:///" + filepath.TrimStart('/').Replace(" ", "%20");
- var elements = await converter.Parse($" ", CancellationToken.None);
+ var elements = await converter.ParseAsync($" ");
Assert.That(elements.Count(), Is.EqualTo(1));
- AssertIsImg(elements.First());
+ AssertIsImg(mainPart, elements.First());
+ }
+
+ [Test]
+ public void SvgNode_ReturnsImage()
+ {
+ var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg"));
+ Assert.That(elements, Has.Count.EqualTo(1));
+ var (drawing, imagePart) = AssertIsImg(mainPart, elements[0]);
+ Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi"));
+ Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes."));
+ Assert.That(imagePart.ContentType, Is.EqualTo("image/svg+xml"));
}
[Test(Description = "Reading local file containing a space in the name")]
@@ -99,9 +112,9 @@ public async Task RemoteImage_WithBaseUri_ShouldSucceed()
converter = new HtmlConverter(mainPart, new IO.DefaultWebRequest() {
BaseImageUrl = new Uri("http://github.com/onizet/html2openxml")
});
- var elements = await converter.Parse($" ", CancellationToken.None);
+ var elements = await converter.ParseAsync($" ");
Assert.That(elements, Is.Not.Empty);
- AssertIsImg(elements.First());
+ AssertIsImg(mainPart, elements.First());
}
[Test(Description = "Image ID must be unique, amongst header, body and footer parts")]
@@ -123,7 +136,7 @@ public async Task ImageIds_IsUniqueAcrossPackagingParts()
Assert.That(beforeMaxDocPropId, Is.Not.Null);
HtmlConverter converter = new(mainPart);
- await converter.ParseHtml(" ");
+ await converter.ParseBody(" ");
mainPart.Document.Save();
var img = mainPart.Document.Body!.Descendants().FirstOrDefault();
@@ -148,24 +161,63 @@ public void WithIncompleteHeader_ShouldNotThrow() where T : OpenXmlPart, IFix
HtmlConverter converter = new(mainPart);
Assert.DoesNotThrowAsync(async () =>
- await converter.ParseHtml(" "));
+ await converter.ParseBody(" "));
}
- private Drawing AssertIsImg (OpenXmlCompositeElement element)
+ [TestCase(typeof(HeaderPart))]
+ [TestCase(typeof(FooterPart))]
+ [TestCase(typeof(MainDocumentPart))]
+ public async Task ParseIntoDocumentPart_ReturnsImageParentedToPart (Type openXmlPartType)
{
- var run = element.GetFirstChild();
+ string html = @" ";
+ OpenXmlElement host;
+ OpenXmlPartContainer container;
+
+ if (openXmlPartType == typeof(HeaderPart))
+ {
+ await converter.ParseHeader(html);
+ container = mainPart.HeaderParts.First();
+ host = mainPart.HeaderParts.First().Header;
+ }
+ else if (openXmlPartType == typeof(FooterPart))
+ {
+ await converter.ParseFooter(html);
+ container = mainPart.FooterParts.First();
+ host = mainPart.FooterParts.First().Footer;
+ }
+ else if (openXmlPartType == typeof(MainDocumentPart))
+ {
+ await converter.ParseBody(html);
+ container = mainPart;
+ host = mainPart.Document.Body!;
+ }
+ else
+ {
+ throw new NotSupportedException($"Test case not supported for {openXmlPartType.FullName}");
+ }
+
+ Assert.That(host.ChildElements, Has.Count.EqualTo(1));
+ var p = host.ChildElements.FirstOrDefault(c => c is Paragraph);
+ Assert.That(p, Is.Not.Null);
+ AssertIsImg(container, p);
+ AssertThatOpenXmlDocumentIsValid();
+ }
+
+ private static (Drawing, ImagePart) AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph)
+ {
+ var run = paragraph.GetFirstChild();
Assert.That(run, Is.Not.Null);
- var img = run.GetFirstChild();
- Assert.That(img, Is.Not.Null);
- Assert.That(img.Inline?.Graphic?.GraphicData, Is.Not.Null);
- var pic = img.Inline.Graphic.GraphicData.GetFirstChild();
+ var drawing = run.GetFirstChild();
+ Assert.That(drawing, Is.Not.Null);
+ Assert.That(drawing.Inline?.Graphic?.GraphicData, Is.Not.Null);
+ var pic = drawing.Inline.Graphic.GraphicData.GetFirstChild();
Assert.That(pic?.BlipFill?.Blip?.Embed, Is.Not.Null);
var imagePartId = pic.BlipFill.Blip.Embed.Value;
Assert.That(imagePartId, Is.Not.Null);
- var part = mainPart.GetPartById(imagePartId);
- Assert.That(part, Is.TypeOf(typeof(ImagePart)));
- return img;
+ var imagePart = container.GetPartById(imagePartId);
+ Assert.That(imagePart, Is.TypeOf(typeof(ImagePart)));
+ return (drawing, (ImagePart) imagePart);
}
}
}
\ No newline at end of file
diff --git a/test/HtmlToOpenXml.Tests/LinkTests.cs b/test/HtmlToOpenXml.Tests/LinkTests.cs
index 147bf360..27443ce9 100644
--- a/test/HtmlToOpenXml.Tests/LinkTests.cs
+++ b/test/HtmlToOpenXml.Tests/LinkTests.cs
@@ -1,8 +1,12 @@
using NUnit.Framework;
using DocumentFormat.OpenXml.Wordprocessing;
+using DocumentFormat.OpenXml.Packaging;
+using DocumentFormat.OpenXml;
namespace HtmlToOpenXml.Tests
{
+ using a = DocumentFormat.OpenXml.Drawing;
+
///
/// Tests hyperlink.
///
@@ -15,29 +19,13 @@ public class LinkTests : HtmlConverterTestBase
public void ExternalLink_ShouldSucceed (string link)
{
var elements = converter.Parse($@"Test Caption ");
- Assert.That(elements, Has.Count.EqualTo(1));
- Assert.Multiple(() => {
- Assert.That(elements[0], Is.TypeOf(typeof(Paragraph)));
- Assert.That(elements[0].HasChild(), Is.True);
- });
- var hyperlink = elements[0].GetFirstChild()!;
- Assert.That(hyperlink.Tooltip, Is.Not.Null);
- Assert.That(hyperlink.Tooltip.Value, Is.EqualTo("Test Tooltip"));
- Assert.That(hyperlink.HasChild(), Is.True);
- Assert.That(elements[0].InnerText, Is.EqualTo("Test Caption"));
-
- Assert.That(hyperlink.Id, Is.Not.Null);
- Assert.That(hyperlink.History?.Value, Is.EqualTo(true));
- Assert.That(mainPart.HyperlinkRelationships.Count(), Is.GreaterThan(0));
-
- var extLink = mainPart.HyperlinkRelationships.FirstOrDefault(r => r.Id == hyperlink.Id);
- Assert.That(extLink, Is.Not.Null);
- Assert.That(extLink.IsExternal, Is.EqualTo(true));
- Assert.That(extLink.Uri.AbsoluteUri, Is.EqualTo("http://www.site.com/"));
+ AssertHyperlink(mainPart, elements);
}
[TestCase(@"Js ")]
[TestCase(@"Unknown site ")]
+ [TestCase(@"Empty link ")]
+ [TestCase(@"Empty bookmark ")]
public void InvalidLink_ReturnsSimpleRun (string html)
{
// invalid link leads to simple Run with no link
@@ -64,6 +52,20 @@ public void TextImageLink_ReturnsHyperlinkWithTextAndImage ()
Assert.That(hyperlink.LastChild?.InnerText, Is.EqualTo(" Test Caption"));
}
+ [Test(Description = "Assert that `figcaption` tag doesn't generate paragraphs")]
+ public void ImageFigcaptionLink_ReturnsHyperlinkWithTextAndImage ()
+ {
+ var elements = converter.Parse(@"Go to
+
+ Caption for the image ");
+ Assert.That(elements[0].FirstChild, Is.TypeOf(typeof(Hyperlink)));
+
+ var hyperlink = (Hyperlink) elements[0].FirstChild;
+ Assert.That(hyperlink.ChildElements, Has.Count.EqualTo(4));
+ Assert.That(hyperlink.ChildElements, Has.All.TypeOf(typeof(Run)), "Hyperlinks don't accept inner paragraphs");
+ Assert.That(hyperlink.Descendants(), Is.Not.Null);
+ }
+
[Test]
public void Anchoring_WithUnknownTarget_ReturnsHyperlinkWithBookmark ()
{
@@ -88,8 +90,8 @@ public void SetExcludeAnchoring_ReturnsSimpleRun ()
Assert.That(elements[0], Is.TypeOf(typeof(Paragraph)));
Assert.That(elements[0].HasChild(), Is.True);
- var hyperlink = (Hyperlink) elements[0].FirstChild!;
- Assert.That(hyperlink.Anchor?.Value, Is.EqualTo("_top"));
+ var hyperlink = elements[0].GetFirstChild();
+ Assert.That(hyperlink?.Anchor?.Value, Is.EqualTo("_top"));
// this should generate a Run and not an Hyperlink
elements = converter.Parse(@"Anchor3 ");
@@ -159,5 +161,119 @@ public void WithMultipleRun_ReturnsHyperlinkWithMultipleRuns()
Assert.That(h.ChildElements, Has.All.TypeOf(typeof(Run)));
Assert.That(h.InnerText, Is.EqualTo("Html to OpenXml !"));
}
+
+ [TestCase(typeof(HeaderPart))]
+ [TestCase(typeof(FooterPart))]
+ [TestCase(typeof(MainDocumentPart))]
+ public async Task ParseIntoDocumentPart_ReturnsHyperlinkParentedToPart (Type openXmlPartType)
+ {
+ string html = @"Test Caption ";
+ OpenXmlElement host;
+ OpenXmlPartContainer container;
+
+ if (openXmlPartType == typeof(HeaderPart))
+ {
+ await converter.ParseHeader(html);
+ container = mainPart.HeaderParts.First();
+ host = mainPart.HeaderParts.First().Header;
+ }
+ else if (openXmlPartType == typeof(FooterPart))
+ {
+ await converter.ParseFooter(html);
+ container = mainPart.FooterParts.First();
+ host = mainPart.FooterParts.First().Footer;
+ }
+ else if (openXmlPartType == typeof(MainDocumentPart))
+ {
+ await converter.ParseBody(html);
+ container = mainPart;
+ host = mainPart.Document.Body!;
+ }
+ else
+ {
+ throw new NotSupportedException($"Test case not supported for {openXmlPartType.FullName}");
+ }
+
+ AssertHyperlink(container, host.ChildElements);
+ AssertThatOpenXmlDocumentIsValid();
+ }
+
+ [TestCase("_top", Description = "Bookmark _top is reserved and stands in the top of the document")]
+ [TestCase("top", Description = "Alternate supported bookmark for user convenience")]
+ public async Task WithTopAnchoring_ReturnsAutoCreatedBookmark(string anchor)
+ {
+ await converter.ParseBody($"Move to top ");
+
+ Assert.That(mainPart.Document.Body!.Elements().Count(), Is.EqualTo(3));
+ Assert.That(mainPart.Document.Body!.FirstChild, Is.TypeOf());
+ Assert.That(mainPart.Document.Body!.ElementAt(1), Is.TypeOf());
+ Assert.That(mainPart.Document.Body!.LastChild, Is.TypeOf());
+
+ var p = mainPart.Document.Body!.GetFirstChild()!;
+ Assert.That(p.GetFirstChild()?.Name?.Value, Is.EqualTo("_top"), "Reserved keyword `_top`");
+
+ p = mainPart.Document.Body!.GetLastChild()!;
+ Assert.That(p.GetFirstChild()?.Anchor?.Value, Is.EqualTo("_top"));
+ }
+
+ [Test(Description = "Bookmark must not stand as a single paragraph but inserted into the heading")]
+ public async Task WithHeading_ThenTopAnchoring_PrependBookmarkIntoHeading()
+ {
+ await converter.ParseBody(@"Heading 1
+ Move to top ");
+
+ var p = mainPart.Document.Body!.GetFirstChild();
+ Assert.That(p, Is.Not.Null);
+ Assert.Multiple(() =>
+ {
+ Assert.That(p.GetFirstChild()?.Name?.Value, Is.EqualTo("_top"),
+ "Expected `_top` bookmark in the first body paragraph");
+ Assert.That(p.GetFirstChild(), Is.Not.Null);
+ Assert.That(p.ParagraphProperties?.ParagraphStyleId?.Val?.Value, Is.EqualTo("Heading1"),
+ "Expected first paragraph is the heading");
+ });
+ }
+
+ [Test(Description = "Clickable image pointing to `_top` bookmark requires additional link relationship")]
+ public async Task WithImageTopAnchoring_ReturnsClickableLink()
+ {
+ await converter.ParseBody(@"Move to top
+
+ ");
+ var p = mainPart.Document.Body!.GetFirstChild();
+ var drawing = p?.Descendants().FirstOrDefault();
+ Assert.That(drawing, Is.Not.Null);
+ var linkTarget = drawing?.Inline?.DocProperties?.GetFirstChild()?.Id?.Value;
+ Assert.That(linkTarget, Is.Not.Null);
+ var rel = mainPart.HyperlinkRelationships.FirstOrDefault(r => r.Id == linkTarget);
+ Assert.That(rel, Is.Not.Null);
+ Assert.That(rel.Uri.ToString(), Is.EqualTo("#_top"));
+ }
+
+ private static void AssertHyperlink(OpenXmlPartContainer container, IEnumerable elements)
+ {
+ Assert.That(elements.Count(), Is.EqualTo(1));
+ Assert.Multiple(() => {
+ Assert.That(elements.First(), Is.TypeOf(typeof(Paragraph)));
+ Assert.That(elements.First().HasChild(), Is.True);
+ });
+ var hyperlink = elements.First().GetFirstChild()!;
+ Assert.That(hyperlink.Tooltip, Is.Not.Null);
+ Assert.That(hyperlink.Tooltip.Value, Is.EqualTo("Test Tooltip"));
+ Assert.That(hyperlink.HasChild(), Is.True);
+ Assert.That(elements.First().InnerText, Is.EqualTo("Test Caption"));
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(hyperlink.Id, Is.Not.Null);
+ Assert.That(hyperlink.History?.Value, Is.EqualTo(true));
+ Assert.That(container.HyperlinkRelationships.Count(), Is.GreaterThan(0));
+ });
+
+ var extLink = container.HyperlinkRelationships.FirstOrDefault(r => r.Id == hyperlink.Id);
+ Assert.That(extLink, Is.Not.Null);
+ Assert.That(extLink.IsExternal, Is.EqualTo(true));
+ Assert.That(extLink.Uri.AbsoluteUri, Is.EqualTo("http://www.site.com/"));
+ }
}
}
\ No newline at end of file
diff --git a/test/HtmlToOpenXml.Tests/NumberingTests.cs b/test/HtmlToOpenXml.Tests/NumberingTests.cs
index eee33ef8..5288765e 100644
--- a/test/HtmlToOpenXml.Tests/NumberingTests.cs
+++ b/test/HtmlToOpenXml.Tests/NumberingTests.cs
@@ -131,6 +131,16 @@ public void EmptyList_ShouldBeIgnored()
}
}
+ [Test(Description = "Empty list item should not be registred")]
+ public void EmptyLiElement_ShouldBeIgnored()
+ {
+ var elements = converter.Parse(@"");
+ Assert.That(elements, Has.Count.EqualTo(1));
+ }
+
[Test(Description = "Increment instanceId based on existing lists")]
public void WithExistingNumbering_ReturnsUniqueInstanceId()
{
@@ -257,9 +267,9 @@ public void UseVariantStyle_ListItem_ReturnsAppliedStyle()
[Test(Description = "Resume indenting from existing numbering (default behaviour)")]
public async Task ContinueNumbering_ReturnsSecondList_ContinueOrder()
{
- await converter.ParseHtml(@"Item 1 ");
+ await converter.ParseBody(@"Item 1 ");
- await converter.ParseHtml("Item 2 ");
+ await converter.ParseBody("Item 2 ");
var absNum = mainPart.NumberingDefinitionsPart?.Numbering
.Elements()
@@ -281,15 +291,16 @@ public async Task ContinueNumbering_ReturnsSecondList_ContinueOrder()
e.ParagraphProperties?.NumberingProperties?.NumberingId?.Val?.Value),
Has.All.EqualTo(instances.First().NumberID!.Value),
"All paragraphs are linked to the same list instance");
+ AssertThatOpenXmlDocumentIsValid();
}
[Test(Description = "Stop indenting from existing numbering (issue #57)")]
public async Task DisableContinueNumbering_ReturnsSecondList_RestartingOrder()
{
- await converter.ParseHtml(@"Item 1 ");
+ await converter.ParseBody(@"Item 1 ");
converter.ContinueNumbering = false;
- await converter.ParseHtml("Item 2 ");
+ await converter.ParseBody("Item 2 ");
var absNum = mainPart.NumberingDefinitionsPart?.Numbering
.Elements()
@@ -311,6 +322,7 @@ public async Task DisableContinueNumbering_ReturnsSecondList_RestartingOrder()
e.ParagraphProperties?.NumberingProperties?.NumberingId?.Val?.Value),
Is.Unique,
"All paragraphs use different list instances");
+ AssertThatOpenXmlDocumentIsValid();
}
///
diff --git a/test/HtmlToOpenXml.Tests/Resources/kiwi.svg b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg
new file mode 100644
index 00000000..9a5b8dd7
--- /dev/null
+++ b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg
@@ -0,0 +1,30 @@
+
+
+
+
+
+ Illustration of a Kiwi
+
+
+ Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.
+
+
+
+