diff --git a/.vscode/settings.json b/.vscode/settings.json
index e8962d50..9b58ce6b 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,9 +1,9 @@
{
- "omnisharp.organizeImportsOnFormat": true,
"dotnet.completion.showCompletionItemsFromUnimportedNamespaces": false,
"coverage-gutters.coverageFileNames":[
"coverage.info"
],
"coverage-gutters.showGutterCoverage": false,
- "coverage-gutters.showLineCoverage": true
+ "coverage-gutters.showLineCoverage": true,
+ "dotnet.formatting.organizeImportsOnFormat": true
}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bba08de9..7b0af9c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,34 @@
# Changelog
+## 3.2.5
+
+- Fix a crash with the new whitespace handling introduced in 3.2.3 #191
+- Fix crash when the html contains 2 images with identical source path #193
+- Support margin auto for table alignment #194
+- Fix handling whitespace between runs #195
+- Whitelist more mime-types as specified by the IANA standard #196
+- Support EMF file #196
+- Correct handling of `figcaption` (allow nested phrasings) #197
+- Numbering list now supports type attribute `
` #198
+- Always restart nested numbering list #198
+- Fix table borders being removed even when the specified word table style has borders #199
+- Defensive code when download image stream is truncated #201
+- Table inside list is constrained to not exceed page margin #202
+- Table now supports width:auto for auto-fit content #202
+
+## 3.2.4
+
+- Fix a crash with the new whitespace handling introduced in 3.2.3 #191
+- Table inside list must be aligned with the list item #192
+
+## 3.2.3
+
+- Improve support of table alignment #187
+- Fix a crash if a span is empty
+- Heading with only digits should not be considered as a numbering #189
+- Fix whitespaces inserted between spans #179 and #185
+- Support percentage size (typically width:100%) for img node #188
+
## 3.2.2
- Supports a feature to disable heading numbering #175
diff --git a/examples/Demo/Demo.csproj b/examples/Demo/Demo.csproj
index 3e76b9dc..d0b3a6f8 100644
--- a/examples/Demo/Demo.csproj
+++ b/examples/Demo/Demo.csproj
@@ -21,6 +21,7 @@
+
\ No newline at end of file
diff --git a/examples/Demo/Resources/LargeImg.html b/examples/Demo/Resources/LargeImg.html
new file mode 100644
index 00000000..79b187ce
--- /dev/null
+++ b/examples/Demo/Resources/LargeImg.html
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/examples/Demo/app.config b/examples/Demo/app.config
deleted file mode 100644
index 400b70b3..00000000
--- a/examples/Demo/app.config
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/examples/Demo/images/The-Song-of-the-World.jpg b/examples/Demo/images/The-Song-of-the-World.jpg
new file mode 100644
index 00000000..5e1d8983
Binary files /dev/null and b/examples/Demo/images/The-Song-of-the-World.jpg differ
diff --git a/src/Html2OpenXml/Expressions/BlockElementExpression.cs b/src/Html2OpenXml/Expressions/BlockElementExpression.cs
index 62bc8ced..81a8a4f2 100644
--- a/src/Html2OpenXml/Expressions/BlockElementExpression.cs
+++ b/src/Html2OpenXml/Expressions/BlockElementExpression.cs
@@ -27,6 +27,7 @@ class BlockElementExpression: PhrasingElementExpression
{
private readonly OpenXmlLeafElement[]? defaultStyleProperties;
protected readonly ParagraphProperties paraProperties = new();
+ protected TableProperties? tableProperties;
// some style attributes, such as borders or bgcolor, will convert this node to a framed container
protected bool renderAsFramed;
private HtmlBorder styleBorder;
@@ -115,22 +116,44 @@ protected override IEnumerable Interpret (
public override void CascadeStyles(OpenXmlElement element)
{
base.CascadeStyles(element);
- if (!paraProperties.HasChildren || element is not Paragraph paragraph)
+ if (!paraProperties.HasChildren)
return;
- paragraph.ParagraphProperties ??= new ParagraphProperties();
-
- var knownTags = new HashSet();
- foreach (var prop in paragraph.ParagraphProperties)
+ if (element is Paragraph paragraph)
{
- if (!knownTags.Contains(prop.LocalName))
- knownTags.Add(prop.LocalName);
- }
+ paragraph.ParagraphProperties ??= new ParagraphProperties();
- foreach (var prop in paraProperties)
+ var knownTags = new HashSet();
+ foreach (var prop in paragraph.ParagraphProperties)
+ {
+ if (!knownTags.Contains(prop.LocalName))
+ knownTags.Add(prop.LocalName);
+ }
+
+ foreach (var prop in paraProperties)
+ {
+ if (!knownTags.Contains(prop.LocalName))
+ paragraph.ParagraphProperties.AddChild(prop.CloneNode(true));
+ }
+ }
+ else if (tableProperties != null && element is Table table)
{
- if (!knownTags.Contains(prop.LocalName))
- paragraph.ParagraphProperties.AddChild(prop.CloneNode(true));
+ var props = table.GetFirstChild();
+ if (props is null)
+ return;
+
+ var knownTags = new HashSet();
+ foreach (var prop in props)
+ {
+ if (!knownTags.Contains(prop.LocalName))
+ knownTags.Add(prop.LocalName);
+ }
+
+ foreach (var prop in tableProperties)
+ {
+ if (!knownTags.Contains(prop.LocalName))
+ props.AddChild(prop.CloneNode(true));
+ }
}
}
@@ -170,9 +193,12 @@ protected override void ComposeStyles (ParsingContext context)
JustificationValues? align = Converter.ToParagraphAlign(styleAttributes!["text-align"]);
if (!align.HasValue) align = Converter.ToParagraphAlign(node.GetAttribute("align"));
+ if (!align.HasValue) align = Converter.ToParagraphAlign(styleAttributes["justify-content"]);
if (align.HasValue)
{
paraProperties.Justification = new() { Val = align };
+ tableProperties ??= new();
+ tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() };
}
@@ -194,7 +220,7 @@ protected override void ComposeStyles (ParsingContext context)
}
var margin = styleAttributes.GetMargin("margin");
- Indentation? indentation = null;
+ Indentation? indentation = null;
if (!margin.IsEmpty)
{
if (margin.Top.IsFixed || margin.Bottom.IsFixed)
@@ -345,7 +371,7 @@ private static Paragraph CreateParagraph(ParsingContext context, IList
-/// Process the parsing of a figcaption element, which is used to describe an image.
-///
-sealed class FigureCaptionExpression(IHtmlElement node) : PhrasingElementExpression(node)
-{
-
- ///
- public override IEnumerable Interpret (ParsingContext context)
- {
- ComposeStyles(context);
- var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
- if (!childElements.Any())
- return [];
-
- var p = new Paragraph (
- new Run(
- new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
- ),
- new SimpleField(
- new Run(
- new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
- ) { Instruction = " SEQ Figure \\* ARABIC " }
- ) {
- ParagraphProperties = new ParagraphProperties {
- ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
- KeepNext = new KeepNext()
- }
- };
-
- if (childElements.First() is Run run) // any caption?
- {
- Text? t = run.GetFirstChild();
- if (t != null)
- t.Text = " " + t.InnerText; // append a space after the numero of the picture
- }
-
- return [p];
- }
-
- ///
- /// Add a new figure caption to the document.
- ///
- /// Returns the id of the new figure caption.
- private static int AddFigureCaption(ParsingContext context)
- {
- var figCaptionRef = context.Properties("figCaptionRef");
- if (!figCaptionRef.HasValue)
- {
- figCaptionRef = 0;
- foreach (var p in context.MainPart.Document.Descendants())
- {
- if (p.Instruction == " SEQ Figure \\* ARABIC ")
- figCaptionRef++;
- }
- }
- figCaptionRef++;
-
- context.Properties("figCaptionRef", figCaptionRef);
- return figCaptionRef.Value;
- }
-}
\ No newline at end of file
+/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
+ *
+ * This source is subject to the Microsoft Permissive License.
+ * Please see the License.txt file for more information.
+ * All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+ * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+ * PARTICULAR PURPOSE.
+ */
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using AngleSharp.Html.Dom;
+using DocumentFormat.OpenXml;
+using DocumentFormat.OpenXml.Wordprocessing;
+
+namespace HtmlToOpenXml.Expressions;
+
+///
+/// Process the parsing of a figcaption element, which is used to describe an image.
+///
+sealed class FigureCaptionExpression(IHtmlElement node) : BlockElementExpression(node)
+{
+
+ ///
+ public override IEnumerable Interpret (ParsingContext context)
+ {
+ ComposeStyles(context);
+ var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
+
+ var figNumRef = new List() {
+ new Run(
+ new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
+ ),
+ new SimpleField(
+ new Run(
+ new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
+ ) { Instruction = " SEQ Figure \\* ARABIC " }
+ };
+
+
+ if (!childElements.Any())
+ {
+ return [new Paragraph(figNumRef) {
+ ParagraphProperties = new ParagraphProperties {
+ ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
+ KeepNext = DetermineKeepNext(node),
+ }
+ }];
+ }
+
+ //Add the figure number references to the start of the first paragraph.
+ if(childElements.FirstOrDefault() is Paragraph p)
+ {
+ var properties = p.GetFirstChild();
+ p.InsertAfter(new Run(
+ new Text(" ") { Space = SpaceProcessingModeValues.Preserve }
+ ), properties);
+ p.InsertAfter(figNumRef[1], properties);
+ p.InsertAfter(figNumRef[0], properties);
+ }
+ else
+ {
+ // The first child of the figure caption is a table or something.
+ // Just prepend a new paragraph with the figure number reference.
+ childElements = [
+ new Paragraph(figNumRef),
+ ..childElements
+ ];
+ }
+
+ foreach (var paragraph in childElements.OfType())
+ {
+ paragraph.ParagraphProperties ??= new ParagraphProperties();
+ paragraph.ParagraphProperties.ParagraphStyleId ??= context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle);
+ //Keep caption paragraphs together.
+ paragraph.ParagraphProperties.KeepNext = new KeepNext();
+ }
+
+ if(childElements.OfType().LastOrDefault() is Paragraph lastPara)
+ {
+ lastPara.ParagraphProperties!.KeepNext = DetermineKeepNext(node);
+ }
+
+ return childElements;
+ }
+
+ ///
+ /// Add a new figure caption to the document.
+ ///
+ /// Returns the id of the new figure caption.
+ private static int AddFigureCaption(ParsingContext context)
+ {
+ var figCaptionRef = context.Properties("figCaptionRef");
+ if (!figCaptionRef.HasValue)
+ {
+ figCaptionRef = 0;
+ foreach (var p in context.MainPart.Document.Descendants())
+ {
+ if (p.Instruction == " SEQ Figure \\* ARABIC ")
+ figCaptionRef++;
+ }
+ }
+ figCaptionRef++;
+
+ context.Properties("figCaptionRef", figCaptionRef);
+ return figCaptionRef.Value;
+ }
+
+ ///
+ /// Determines whether the KeepNext property should apply this this caption.
+ ///
+ /// A new or null.
+ private static KeepNext? DetermineKeepNext(IHtmlElement node)
+ {
+ // A caption at the end of a figure will have no next sibling.
+ if(node.NextElementSibling is null)
+ {
+ return null;
+ }
+ return new();
+ }
+}
diff --git a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs
index 44ec6bc0..3efc82ef 100644
--- a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs
+++ b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs
@@ -38,9 +38,11 @@ private static Dictionary> InitKnownTa
{ TagNames.Abbr, el => new AbbreviationExpression((IHtmlElement) el) },
{ "acronym", el => new AbbreviationExpression((IHtmlElement) el) },
{ TagNames.B, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
+ { TagNames.Big, el => new PhrasingElementExpression((IHtmlElement) el, new FontSize() { Val = "36" }) },
{ TagNames.BlockQuote, el => new BlockQuoteExpression((IHtmlElement) el) },
{ TagNames.Br, _ => new LineBreakExpression() },
{ TagNames.Cite, el => new CiteElementExpression((IHtmlElement) el) },
+ { TagNames.Code, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.Dd, el => new BlockElementExpression((IHtmlElement) el, new Indentation() { FirstLine = "708" }, new SpacingBetweenLines() { After = "0" }) },
{ TagNames.Del, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
{ TagNames.Dfn, el => new AbbreviationExpression((IHtmlElement) el) },
@@ -57,10 +59,18 @@ private static Dictionary> InitKnownTa
{ TagNames.Hr, el => new HorizontalLineExpression((IHtmlElement) el) },
{ TagNames.Img, el => new ImageExpression((IHtmlImageElement) el) },
{ TagNames.Ins, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
+ { TagNames.Kbd, el => new PhrasingElementExpression((IHtmlElement) el) },
+ { TagNames.Mark, el => new PhrasingElementExpression((IHtmlElement) el, new Shading { Val = ShadingPatternValues.Clear, Fill = "FFFF00" /* yellow */ }) },
+ { TagNames.NoBr, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.Ol, el => new ListExpression((IHtmlElement) el) },
{ TagNames.Pre, el => new PreElementExpression((IHtmlElement) el) },
{ TagNames.Q, el => new QuoteElementExpression((IHtmlElement) el) },
{ TagNames.Quote, el => new QuoteElementExpression((IHtmlElement) el) },
+ { TagNames.Rb, el => new PhrasingElementExpression((IHtmlElement) el) },
+ { TagNames.Rt, el => new PhrasingElementExpression((IHtmlElement) el) },
+ { TagNames.Ruby, el => new BlockElementExpression((IHtmlElement) el) },
+ { TagNames.Samp, el => new PhrasingElementExpression((IHtmlElement) el) },
+ { TagNames.Small, el => new PhrasingElementExpression((IHtmlElement) el, new FontSize() { Val = "20" }) },
{ TagNames.Span, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.S, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
{ TagNames.Strike, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) },
@@ -70,8 +80,10 @@ private static Dictionary> InitKnownTa
{ TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) },
{ TagNames.Table, el => new TableExpression((IHtmlTableElement) el) },
{ TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) },
+ { TagNames.Tt, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
{ TagNames.Ul, el => new ListExpression((IHtmlElement) el) },
+ { TagNames.Var, el => new PhrasingElementExpression((IHtmlElement) el) }
};
return knownTags;
@@ -83,7 +95,6 @@ private static Dictionary> InitKnownTa
/// The parsing context.
public abstract IEnumerable Interpret (ParsingContext context);
-
///
/// Create a new interpreter for the given html tag.
///
diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs
index a4b61c30..c1c4df32 100644
--- a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs
+++ b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs
@@ -56,9 +56,14 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
}
if (imgNode.DisplayHeight > 0)
{
- // Image perspective skewed. Bug fixed by ddeforge on github.com/onizet/html2openxml/discussions/350500
preferredSize.Height = imgNode.DisplayHeight;
}
+ if (preferredSize.IsEmpty)
+ {
+ var styles = imgNode.GetStyles();
+ preferredSize.Width = GetDimension(styles, "width", "max-width", 642);
+ preferredSize.Height = GetDimension(styles, "height", "max-height", 428);
+ }
HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None)
.ConfigureAwait(false).GetAwaiter().GetResult();
@@ -85,6 +90,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
else if (preferredSize.Width <= 0 || preferredSize.Height <= 0)
{
Size actualSize = iinfo.Size;
+ // Image perspective skewed. Bug fixed by ddeforge on github.com/onizet/html2openxml/discussions/350500
preferredSize = ImageHeader.KeepAspectRatio(actualSize, preferredSize);
}
@@ -130,4 +136,22 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
return img;
}
+
+ private static int GetDimension(HtmlAttributeCollection styles, string primaryStyle, string fallbackStyle, int percentageBase)
+ {
+ var unit = styles.GetUnit(primaryStyle);
+ if (!unit.IsValid)
+ {
+ unit = styles.GetUnit(fallbackStyle);
+ }
+
+ if (unit.IsValid)
+ {
+ return unit.Type == UnitMetric.Percent?
+ (int)(unit.Value * percentageBase / 100) :
+ unit.ValueInPx;
+ }
+
+ return 0;
+ }
}
\ No newline at end of file
diff --git a/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs b/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs
index 9a9f6ffd..1039fae8 100644
--- a/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs
+++ b/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs
@@ -9,6 +9,7 @@
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
* PARTICULAR PURPOSE.
*/
+using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
@@ -23,7 +24,8 @@ namespace HtmlToOpenXml.Expressions;
///
sealed class HeadingElementExpression(IHtmlElement node) : NumberingExpressionBase(node)
{
- private static readonly Regex numberingRegex = new(@"^\s*(\d+\.?)*\s*");
+ private static readonly Regex numberingRegex = new(@"^\s*(?[0-9\.]+\s*)[^0-9]",
+ RegexOptions.Compiled, TimeSpan.FromMilliseconds(100));
///
public override IEnumerable Interpret (ParsingContext context)
@@ -36,7 +38,7 @@ public override IEnumerable Interpret (ParsingContext context)
var paragraph = childElements.FirstOrDefault() as Paragraph;
- paragraph ??= new Paragraph(childElements);
+ paragraph ??= new(childElements);
paragraph.ParagraphProperties ??= new();
paragraph.ParagraphProperties.ParagraphStyleId =
context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.HeadingStyle + level);
@@ -65,16 +67,30 @@ public override IEnumerable Interpret (ParsingContext context)
private static bool IsNumbering(OpenXmlElement runElement)
{
+ if (runElement.InnerText is null)
+ return false;
+
// Check if the line starts with a number format (1., 1.1., 1.1.1.)
// If it does, make sure we make the heading a numbered item
- Match regexMatch = numberingRegex.Match(runElement.InnerText ?? string.Empty);
+ var headingText = runElement.InnerText;
+ Match regexMatch;
+ try
+ {
+ regexMatch = numberingRegex.Match(headingText);
+ }
+ catch (RegexMatchTimeoutException)
+ {
+ return false;
+ }
+
// Make sure we only grab the heading if it starts with a number
- if (regexMatch.Groups.Count > 1 && regexMatch.Groups[1].Captures.Count > 0)
+ if (regexMatch.Success && headingText.Length > regexMatch.Groups["number"].Length)
{
- // Strip numbers from text
+ // Strip numbers from text
+ headingText = headingText.Substring(regexMatch.Groups["number"].Length);
runElement.InnerXml = runElement.InnerXml
- .Replace(runElement.InnerText!, runElement.InnerText!.Substring(regexMatch.Length));
+ .Replace(runElement.InnerText!, headingText);
return true;
}
diff --git a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs
index 3fa20af3..ed7af86d 100644
--- a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs
+++ b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs
@@ -1,4 +1,4 @@
-/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
+/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
*
* This source is subject to the Microsoft Permissive License.
* Please see the License.txt file for more information.
@@ -91,6 +91,24 @@ public override IEnumerable Interpret(ParsingContext context)
var childElements = expression.Interpret(context);
if (!childElements.Any()) continue;
+ // table must be aligned to the list item
+ var tables = childElements.OfType
();
+ var tableIndentation = level * Indentation * 2;
+ foreach (var table in tables)
+ {
+ var tableProperties = table.GetFirstChild();
+ if (tableProperties == null)
+ table.PrependChild(tableProperties = new());
+
+ tableProperties.TableIndentation ??= new() { Width = tableIndentation };
+ // ensure to restrain the table width to the list item
+ if (tableProperties.TableWidth?.Type?.Value == TableWidthUnitValues.Pct
+ && tableProperties.TableWidth?.Width?.Value == "5000")
+ {
+ tableProperties.TableWidth.Width = (5000 - tableIndentation).ToString();
+ }
+ }
+
// ensure to filter out any non-paragraph like any nested table
var paragraphs = childElements.OfType();
var listItemStyleId = GetStyleIdForListItem(context.DocumentStyle, liNode);
@@ -143,7 +161,15 @@ private ListContext ConcretiseInstance(ParsingContext context, int abstractNumId
int overrideLevelIndex = 0;
var isOrderedTag = node.NodeName.Equals("ol", StringComparison.OrdinalIgnoreCase);
var dir = node.GetTextDirection();
- if (!instanceId.HasValue || context.Converter.ContinueNumbering == false)
+
+ // be sure to restart to 1 any nested ordered list
+ if (currentLevel > 0 && isOrderedTag)
+ {
+ instanceId = IncrementInstanceId(context, abstractNumId, isReusable: false);
+ overrideLevelIndex = currentLevel;
+ listContext = new ListContext(listStyle, abstractNumId, instanceId.Value, currentLevel + 1, dir);
+ }
+ else if (!instanceId.HasValue || context.Converter.ContinueNumbering == false)
{
// create a new instance of that list template
instanceId = IncrementInstanceId(context, abstractNumId, isReusable: context.Converter.ContinueNumbering);
@@ -158,13 +184,6 @@ private ListContext ConcretiseInstance(ParsingContext context, int abstractNumId
instanceId = IncrementInstanceId(context, abstractNumId, isReusable: false);
listContext = new ListContext(listStyle, abstractNumId, instanceId.Value, 1, dir);
}
- // be sure to restart to 1 any nested ordered list
- else if (currentLevel > 0 && isOrderedTag)
- {
- instanceId = IncrementInstanceId(context, abstractNumId, isReusable: false);
- overrideLevelIndex = currentLevel;
- listContext = new ListContext(listStyle, abstractNumId, instanceId.Value, currentLevel + 1, dir);
- }
else
{
return new ListContext(listStyle, abstractNumId, instanceId.Value, currentLevel + 1, dir);
@@ -197,20 +216,39 @@ private ListContext ConcretiseInstance(ParsingContext context, int abstractNumId
private static string GetListName(IElement listNode, string? parentName = null)
{
var styleAttributes = listNode.GetStyles();
+ bool orderedList = listNode.NodeName.Equals("ol", StringComparison.OrdinalIgnoreCase);
string? type = styleAttributes["list-style-type"];
+ if(orderedList && string.IsNullOrEmpty(type))
+ {
+ type = ListTypeToListStyleType(listNode.GetAttribute("type"));
+ }
+
if (string.IsNullOrEmpty(type) || !supportedListTypes.Contains(type!))
{
if (parentName != null && IsCascadingStyle(parentName))
return parentName!;
- bool orderedList = listNode.NodeName.Equals("ol", StringComparison.OrdinalIgnoreCase);
type = orderedList? "decimal" : "disc";
}
return type!;
}
+ ///
+ /// Map ordered list style attribute values to css list-style-type.
+ /// Valid types are "1|a|A|i|I": https://w3schools.com/tags/att_ol_type.asp
+ ///
+ private static string? ListTypeToListStyleType(string? type) => type switch
+ {
+ "1" => "decimal",
+ "a" => "lower-alpha",
+ "A" => "upper-alpha",
+ "i" => "lower-roman",
+ "I" => "upper-roman",
+ _ => null
+ };
+
///
/// Resolve the of a list element node,
/// based on its css class if provided and if matching.
@@ -238,4 +276,4 @@ private static bool IsCascadingStyle(string styleName)
{
return styleName == "decimal-tiered";
}
-}
\ No newline at end of file
+}
diff --git a/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs b/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs
index c7619824..ec489ef4 100644
--- a/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs
+++ b/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs
@@ -1,4 +1,4 @@
-/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
+/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
*
* This source is subject to the Microsoft Permissive License.
* Please see the License.txt file for more information.
@@ -12,10 +12,8 @@
using System;
using System.Collections.Generic;
using System.Globalization;
-using System.Linq;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
-using AngleSharp.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
@@ -56,7 +54,8 @@ protected virtual IEnumerable Interpret (
runs.Add(element);
}
}
- return CombineRuns(runs);
+
+ return runs;
}
public override void CascadeStyles(OpenXmlElement element)
@@ -189,40 +188,4 @@ protected virtual void ComposeStyles (ParsingContext context)
if (font.Size.IsFixed)
runProperties.FontSize = new FontSize() { Val = Math.Round(font.Size.ValueInPoint * 2).ToString(CultureInfo.InvariantCulture) };
}
-
- ///
- /// Mimics the behaviour of Html rendering when 2 consecutives runs are separated by a space.
- ///
- protected static IEnumerable CombineRuns(IEnumerable runs)
- {
- if (runs.Count() == 1)
- {
- yield return runs.First();
- yield break;
- }
-
- bool endsWithSpace = true;
- foreach (var run in runs)
- {
- var textElement = run.GetFirstChild();
- // run can be also a hyperlink
- textElement ??= run.GetFirstChild()?.GetFirstChild();
-
- if (textElement != null) // could be null when
- {
- var text = textElement.Text;
- // we know that the text cannot be empty because we skip them in TextExpression
- if (!endsWithSpace && !text[0].IsSpaceCharacter())
- {
- yield return new Run(new Text(" ") { Space = SpaceProcessingModeValues.Preserve });
- }
- endsWithSpace = text[text.Length - 1].IsSpaceCharacter();
- }
- else if (run.LastChild is Break)
- {
- endsWithSpace = true;
- }
- yield return run;
- }
- }
-}
\ No newline at end of file
+}
diff --git a/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs b/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs
index 4e816f89..f9d60817 100644
--- a/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs
+++ b/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs
@@ -31,9 +31,6 @@ public override IEnumerable Interpret (ParsingContext context)
{
var childElements = base.Interpret (context);
- if (!childElements.Any()) // Word requires that the cell is not empty
- childElements = [new Paragraph()];
-
var cell = new TableCell (cellProperties);
if (cellNode.ColumnSpan > 1)
@@ -46,6 +43,13 @@ public override IEnumerable Interpret (ParsingContext context)
cellProperties.VerticalMerge = new() { Val = MergedCellValues.Restart };
}
+ // Word requires at least one paragraph in a cell
+ // OpenXmlValidator does not catch this error
+ if (!childElements.Any(c => c is Paragraph))
+ {
+ childElements = childElements.Append(new Paragraph());
+ }
+
cell.Append(childElements);
return [cell];
}
diff --git a/src/Html2OpenXml/Expressions/Table/TableExpression.cs b/src/Html2OpenXml/Expressions/Table/TableExpression.cs
index 84c1d029..3faea0ff 100644
--- a/src/Html2OpenXml/Expressions/Table/TableExpression.cs
+++ b/src/Html2OpenXml/Expressions/Table/TableExpression.cs
@@ -156,7 +156,7 @@ private static int GuessColumnsCount(IHtmlTableElement tableNode)
}
}
- if (rows.Any())
+ if (rows.Length > 0)
columnCount = Math.Max(rows.Max(), columnCount);
}
@@ -186,6 +186,9 @@ protected override void ComposeStyles (ParsingContext context)
tableProperties.TableWidth = new() { Type = TableWidthUnitValues.Dxa,
Width = width.ValueInDxa.ToString(CultureInfo.InvariantCulture) };
break;
+ case UnitMetric.Auto:
+ tableProperties.TableWidth = new() { Width = "0", Type = TableWidthUnitValues.Auto };
+ break;
}
foreach (string className in tableNode.ClassList)
@@ -198,10 +201,6 @@ protected override void ComposeStyles (ParsingContext context)
}
}
- var align = Converter.ToParagraphAlign(tableNode.GetAttribute("align"));
- if (align.HasValue)
- tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() };
-
var dir = tableNode.GetTextDirection();
if (dir.HasValue)
tableProperties.BiDiVisual = new() {
@@ -243,7 +242,8 @@ protected override void ComposeStyles (ParsingContext context)
tableProperties.TableBorders = tableBorders;
}
// is the border=0? If so, we remove the border regardless the style in use
- else if (tableNode.Border == 0)
+ // but only remove border if the html style border was set, otherwise leave the border style as-is.
+ else if (!styleBorder.IsEmpty && tableNode.Border == 0)
{
tableProperties.TableBorders = new TableBorders() {
TopBorder = new TopBorder { Val = BorderValues.None },
@@ -280,5 +280,22 @@ protected override void ComposeStyles (ParsingContext context)
};
}
}
+
+ var align = Converter.ToParagraphAlign(tableNode.GetAttribute("align"))
+ ?? Converter.ToParagraphAlign(styleAttributes["justify-self"]);
+ if (!align.HasValue)
+ {
+ var margin = styleAttributes.GetMargin("margin");
+ if (margin.Left.Type == UnitMetric.Auto)
+ {
+ if (margin.Right.Type == UnitMetric.Auto)
+ align = JustificationValues.Center;
+ else
+ align = JustificationValues.Right;
+ }
+ }
+
+ if (align.HasValue)
+ tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() };
}
}
diff --git a/src/Html2OpenXml/Expressions/TextExpression.cs b/src/Html2OpenXml/Expressions/TextExpression.cs
index 427e364b..dd2705df 100644
--- a/src/Html2OpenXml/Expressions/TextExpression.cs
+++ b/src/Html2OpenXml/Expressions/TextExpression.cs
@@ -9,6 +9,10 @@
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
* PARTICULAR PURPOSE.
*/
+using System;
+#if NET5_0_OR_GREATER
+using System.Collections.Frozen;
+#endif
using System.Collections.Generic;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
@@ -23,55 +27,161 @@ namespace HtmlToOpenXml.Expressions;
///
sealed class TextExpression(INode node) : HtmlDomExpression
{
+ static readonly ISet AllPhrasings = InitPhrasingSets();
private readonly INode node = node;
+ private static ISet InitPhrasingSets()
+ {
+ var sets = new HashSet(StringComparer.InvariantCultureIgnoreCase) {
+ TagNames.A, TagNames.Abbr, TagNames.B, TagNames.Big, TagNames.Cite, TagNames.Code,
+ TagNames.Del, TagNames.Dfn, TagNames.Em, TagNames.Font, TagNames.Hr, TagNames.I,
+ TagNames.Img, TagNames.Ins, TagNames.Kbd, TagNames.Mark, TagNames.NoBr, TagNames.Q,
+ TagNames.Rp, TagNames.Rt, TagNames.S, TagNames.Samp, TagNames.Small, TagNames.Span,
+ TagNames.Strike, TagNames.Strong, TagNames.Sub, TagNames.Sup, TagNames.Time,
+ TagNames.Tt, TagNames.U, TagNames.Var
+ };
+
+#if NET5_0_OR_GREATER
+ return sets.ToFrozenSet(StringComparer.InvariantCultureIgnoreCase);
+#else
+ return sets;
+#endif
+ }
+
///
public override IEnumerable Interpret (ParsingContext context)
{
string text = node.TextContent.Normalize();
- if (text.Trim().Length == 0) return [];
+
+ if (text.Length == 0)
+ return [];
if (!context.PreserveLinebreaks)
- text = text.CollapseLineBreaks();
- if (context.CollapseWhitespaces && text[0].IsWhiteSpaceCharacter() &&
- node.PreviousSibling is IHtmlImageElement)
{
- text = " " + text.CollapseAndStrip();
+ text = text.CollapseLineBreaks();
+ if (text.Length == 0)
+ return [];
}
- else if (context.CollapseWhitespaces)
+
+ // https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace
+ // If there is a space between two phrasing elements, the user agent should collapse it to a single space character.
+ if (context.CollapseWhitespaces)
+ {
+ bool startsWithSpace = text[0].IsWhiteSpaceCharacter(),
+ endsWithSpace = text[text.Length - 1].IsWhiteSpaceCharacter(),
+ preserveBorderSpaces = AllPhrasings.Contains(node.Parent!.NodeName),
+ prevIsPhrasing = node.PreviousSibling is not null &&
+ (AllPhrasings.Contains(node.PreviousSibling.NodeName) || node.PreviousSibling!.NodeType == NodeType.Text),
+ nextIsPhrasing = node.NextSibling is not null &&
+ (AllPhrasings.Contains(node.NextSibling.NodeName) || node.NextSibling!.NodeType == NodeType.Text);
+
text = text.CollapseAndStrip();
+ // keep a collapsed single space if it stands between 2 phrasings that respect.
+ // doesn't ends/starts with a whitespace
+ if (text.Length == 0 && prevIsPhrasing && nextIsPhrasing
+ && (endsWithSpace || startsWithSpace)
+ && !(node.PreviousSibling!.TextContent.Length == 0
+ || node.NextSibling!.TextContent.Length == 0
+ || node.PreviousSibling!.TextContent[node.PreviousSibling!.TextContent.Length - 1].IsWhiteSpaceCharacter()
+ || node.NextSibling!.TextContent[0].IsWhiteSpaceCharacter()
+ ))
+ {
+ return [new Run(new Text(" ") { Space = SpaceProcessingModeValues.Preserve })];
+ }
+ // we strip out all whitespaces and we stand inside a div. Just skip this text content
+ if (text.Length == 0 && !preserveBorderSpaces)
+ {
+ return [];
+ }
+
+ // if previous element is an image, append a space separator
+ // if this is a non-empty phrasing element, append a space separator
+ if (startsWithSpace && node.PreviousSibling is IHtmlImageElement)
+ {
+ text = " " + text;
+ }
+ else if (startsWithSpace && prevIsPhrasing
+ && node.PreviousSibling!.TextContent.Length > 0
+ && !node.PreviousSibling!.TextContent[node.PreviousSibling.TextContent.Length - 1].IsWhiteSpaceCharacter())
+ {
+ text = " " + text;
+ }
+
+ if (endsWithSpace && (
+ // next run is not starting with a linebreak
+ (nextIsPhrasing && node.NextSibling!.TextContent.Length > 0 &&
+ !node.NextSibling!.TextContent[0].IsLineBreak()) ||
+ // if there is no more text element or is empty, eat the trailing space
+ (preserveBorderSpaces && (node.NextSibling is not null
+ || node.Parent.NextSibling is not null))))
+ {
+ text += " ";
+ }
+ }
+
+
+ if (text.Length == 0)
+ return [];
+
if (!context.PreserveLinebreaks)
- return [new Run(new Text(text))];
+ return [new Run(new Text(text) { Space = SpaceProcessingModeValues.Preserve })];
+
+ Run run = EscapeNewlines(text);
+ return [run];
+ }
+ ///
+ /// Convert new lines to .
+ ///
+ private static Run EscapeNewlines(string text)
+ {
var run = new Run();
- char[] chars = text.ToCharArray();
- int shift = 0, c = 0;
bool wasCR = false; // avoid adding 2 breaks for \r\n
- for ( ; c < chars.Length ; c++)
+
+ int startIndex = 0;
+ for (int i = 0; i < text.Length; i++)
{
- if (!chars[c].IsLineBreak())
- {
- wasCR = false;
+ if (!IsLineBreak(text[i], ref wasCR))
continue;
- }
- if (wasCR) continue;
- wasCR = chars[c] == Symbols.CarriageReturn;
-
- if (c > 1)
+ // Add the text before the newline character
+ if (i > startIndex)
{
- run.Append(new Text(new string(chars, shift, c - shift))
+ run.Append(new Text(text.Substring(startIndex, i - startIndex))
{ Space = SpaceProcessingModeValues.Preserve });
run.Append(new Break());
}
- shift = c + 1;
+
+ startIndex = i + 1;
}
- if (c > shift)
- run.Append(new Text(new string(chars, shift, c - shift))
+ // Add any remaining text after the last newline character
+ if (startIndex < text.Length)
+ {
+ run.Append(new Text(text.Substring(startIndex))
{ Space = SpaceProcessingModeValues.Preserve });
+ }
- return [run];
+ return run;
+ }
+
+ private static bool IsLineBreak(char ch, ref bool wasCR)
+ {
+ if (ch == Symbols.CarriageReturn)
+ {
+ wasCR = true;
+ return true;
+ }
+
+ if (ch == Symbols.LineFeed && wasCR)
+ {
+ // Skip LF character after CR to avoid adding an extra break for CR-LF sequence
+ wasCR = false;
+ return false;
+ }
+
+ wasCR = false;
+ return ch == Symbols.LineFeed;
}
}
diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs
index f834ecf2..5fb7f06a 100755
--- a/src/Html2OpenXml/HtmlConverter.cs
+++ b/src/Html2OpenXml/HtmlConverter.cs
@@ -15,6 +15,7 @@
using System.Threading;
using System.Threading.Tasks;
using AngleSharp;
+using AngleSharp.Html.Dom;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
@@ -125,7 +126,8 @@ public async Task ParseHeader(string html, HeaderFooterValues? headerType = null
var paragraphs = await ParseCoreAsync(html, headerPart, headerImageLoader,
new ParallelOptions() { CancellationToken = cancellationToken },
- htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle));
+ htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle))
+ .ConfigureAwait(false);
headerPart.Header.Append(paragraphs);
}
@@ -149,7 +151,8 @@ public async Task ParseFooter(string html, HeaderFooterValues? footerType = null
var paragraphs = await ParseCoreAsync(html, footerPart, footerImageLoader,
new ParallelOptions() { CancellationToken = cancellationToken },
- htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle));
+ htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle))
+ .ConfigureAwait(false);
footerPart.Footer.Append(paragraphs);
}
@@ -165,7 +168,8 @@ public async Task ParseBody(string html, CancellationToken cancellationToken = d
bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester);
var paragraphs = await ParseCoreAsync(html, mainPart, bodyImageLoader,
new ParallelOptions() { CancellationToken = cancellationToken },
- htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.Paragraph));
+ htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.Paragraph))
+ .ConfigureAwait(false);
if (!paragraphs.Any())
return;
@@ -274,13 +278,14 @@ private async Task> ParseCoreAsync(string h
///
/// Walk through all the img tags and preload all the remote images.
///
- private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument,
+ private static async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument,
IImageLoader imageLoader, ParallelOptions parallelOptions)
{
var imageUris = htmlDocument.QuerySelectorAll("img[src]")
- .Cast()
+ .Cast()
.Where(e => AngleSharpExtensions.TryParseUrl(e.GetAttribute("src"), UriKind.RelativeOrAbsolute, out var _))
- .Select(e => e.GetAttribute("src")!);
+ .Select(e => e.GetAttribute("src")!)
+ .Distinct();
if (!imageUris.Any())
return;
diff --git a/src/Html2OpenXml/HtmlToOpenXml.csproj b/src/Html2OpenXml/HtmlToOpenXml.csproj
index bfffc341..917d8d34 100644
--- a/src/Html2OpenXml/HtmlToOpenXml.csproj
+++ b/src/Html2OpenXml/HtmlToOpenXml.csproj
@@ -9,13 +9,13 @@
HtmlToOpenXmlHtmlToOpenXmlHtmlToOpenXml.dll
- 3.2.2
+ 3.2.5icon.pngCopyright 2009-$([System.DateTime]::Now.Year) Olivier Nizet
- (Please write the package release notes in CHANGELOG.md)
+ See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.mdREADME.mdoffice openxml netcore html
- 3.2.2
+ 3.2.5MIThttps://github.com/onizet/html2openxmlhttps://github.com/onizet/html2openxml
@@ -44,7 +44,7 @@
-
+
@@ -64,15 +64,5 @@
true
-
-
-
-
-
-
-
- @(ReleaseNoteLines, '%0a')
-
-
\ No newline at end of file
diff --git a/src/Html2OpenXml/IO/DataUri.cs b/src/Html2OpenXml/IO/DataUri.cs
index 918783fa..394ffaf4 100755
--- a/src/Html2OpenXml/IO/DataUri.cs
+++ b/src/Html2OpenXml/IO/DataUri.cs
@@ -22,7 +22,7 @@ namespace HtmlToOpenXml.IO;
public sealed class DataUri
{
private readonly static Regex dataUriRegex = new Regex(
- @"data\:(?\w+/\w+)?(?:;charset=(?[a-zA-Z_0-9-]+))?(?;base64)?,(?.*)",
+ @"data\:(?\w+/[\w\-\+\.]+)?(?:;charset=(?[a-zA-Z_0-9-]+))?(?;base64)?,(?.*)",
RegexOptions.IgnoreCase | RegexOptions.Singleline);
private DataUri(string mime, byte[] data)
diff --git a/src/Html2OpenXml/IO/DefaultWebRequest.cs b/src/Html2OpenXml/IO/DefaultWebRequest.cs
index 9414a02b..9125c7db 100644
--- a/src/Html2OpenXml/IO/DefaultWebRequest.cs
+++ b/src/Html2OpenXml/IO/DefaultWebRequest.cs
@@ -126,7 +126,13 @@ public DefaultWebRequest(HttpClient httpClient, ILogger? logger = null)
resource.StatusCode = response.StatusCode;
if (response.IsSuccessStatusCode)
+ {
resource.Content = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
+ if (response.Content.Headers.TryGetValues("Content-Type", out var mime))
+ {
+ resource.Headers.Add("Content-Type", string.Join(", ", mime));
+ }
+ }
foreach (var header in response.Headers)
resource.Headers.Add(header.Key, string.Join(", ", header.Value));
diff --git a/src/Html2OpenXml/IO/ImageHeader.cs b/src/Html2OpenXml/IO/ImageHeader.cs
index 858a923d..94a1a518 100755
--- a/src/Html2OpenXml/IO/ImageHeader.cs
+++ b/src/Html2OpenXml/IO/ImageHeader.cs
@@ -58,12 +58,10 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml }
/// Returns true if the detection was successful.
public static bool TryDetectFileType(Stream stream, out FileType type)
{
- using (SequentialBinaryReader reader = new SequentialBinaryReader(stream, leaveOpen: true))
- {
- type = DetectFileType(reader);
- stream.Seek(0L, SeekOrigin.Begin);
- return type != FileType.Unrecognized;
- }
+ using var reader = new SequentialBinaryReader(stream, leaveOpen: true);
+ type = DetectFileType(reader);
+ stream.Seek(0L, SeekOrigin.Begin);
+ return type != FileType.Unrecognized;
}
///
@@ -74,21 +72,19 @@ public static bool TryDetectFileType(Stream stream, out FileType type)
/// The image was of an unrecognised format.
public static Size GetDimensions(Stream stream)
{
- using (SequentialBinaryReader reader = new SequentialBinaryReader(stream, leaveOpen: true))
+ using var reader = new SequentialBinaryReader(stream, leaveOpen: true);
+ FileType type = DetectFileType(reader);
+ stream.Seek(0L, SeekOrigin.Begin);
+ return type switch
{
- FileType type = DetectFileType (reader);
- stream.Seek(0L, SeekOrigin.Begin);
- switch (type)
- {
- case FileType.Bitmap: return DecodeBitmap(reader);
- case FileType.Gif: return DecodeGif(reader);
- case FileType.Jpeg: return DecodeJfif(reader);
- case FileType.Png: return DecodePng(reader);
- case FileType.Emf: return DecodeEmf(reader);
- case FileType.Xml: return DecodeXml(stream);
- default: return Size.Empty;
- }
- }
+ FileType.Bitmap => DecodeBitmap(reader),
+ FileType.Gif => DecodeGif(reader),
+ FileType.Jpeg => DecodeJfif(reader),
+ FileType.Png => DecodePng(reader),
+ FileType.Emf => DecodeEmf(reader),
+ FileType.Xml => DecodeXml(stream),
+ _ => Size.Empty,
+ };
}
///
@@ -123,36 +119,25 @@ public static Size KeepAspectRatio(Size actualSize, Size preferredSize)
private static FileType DetectFileType (SequentialBinaryReader reader)
{
byte[] magicBytes = new byte[MaxMagicBytesLength];
- for (int i = 0; i < MaxMagicBytesLength; i += 1)
- {
- magicBytes[i] = reader.ReadByte();
- foreach (var kvPair in imageFormatDecoders)
- {
- if (StartsWith(magicBytes, kvPair.Key))
- {
- return kvPair.Value;
- }
- }
- }
+ var availableBytes = reader.BaseStream.Length - reader.BaseStream.Position;
+ // reasonably, we can assume that if we are at the end of the stream and we read the header,
+ // the image content must be invalid or truncated.
+ if (availableBytes < MaxMagicBytesLength)
+ return FileType.Unrecognized;
- return FileType.Unrecognized;
- }
+ reader.Read(magicBytes, 0, MaxMagicBytesLength);
- ///
- /// Determines whether the beginning of this byte array instance matches the specified byte array.
- ///
- /// Returns true if the first array starts with the bytes of the second array.
- private static bool StartsWith(byte[] thisBytes, byte[] thatBytes)
- {
- for (int i = 0; i < thatBytes.Length; i += 1)
+ var headerSpan = magicBytes.AsSpan();
+ foreach (var kvPair in imageFormatDecoders)
{
- if (thisBytes[i] != thatBytes[i])
+ // Determines whether the beginning of this array matches s known header.
+ if (headerSpan.StartsWith(kvPair.Key))
{
- return false;
+ return kvPair.Value;
}
}
- return true;
+ return FileType.Unrecognized;
}
private static Size DecodeBitmap(SequentialBinaryReader reader)
@@ -220,7 +205,7 @@ private static Size DecodeJfif(SequentialBinaryReader reader)
return Size.Empty;
// next 2-bytes are : [high-byte] [low-byte]
- var segmentLength = (int)reader.ReadUInt16();
+ int segmentLength = reader.ReadUInt16();
// segment length includes size bytes, so subtract two
segmentLength -= 2;
@@ -228,8 +213,8 @@ private static Size DecodeJfif(SequentialBinaryReader reader)
if (segmentType == 0xC0 || segmentType == 0xC2)
{
reader.ReadByte(); // bits/sample, usually 8
- int height = (int) reader.ReadUInt16();
- int width = (int) reader.ReadUInt16();
+ int height = reader.ReadUInt16();
+ int width = reader.ReadUInt16();
return new Size(width, height);
}
else
diff --git a/src/Html2OpenXml/IO/ImagePrefetcher.cs b/src/Html2OpenXml/IO/ImagePrefetcher.cs
index 9ff6dfc3..062ad113 100644
--- a/src/Html2OpenXml/IO/ImagePrefetcher.cs
+++ b/src/Html2OpenXml/IO/ImagePrefetcher.cs
@@ -1,259 +1,261 @@
-/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
- *
- * This source is subject to the Microsoft Permissive License.
- * Please see the License.txt file for more information.
- * All other rights reserved.
- *
- * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
- * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
- * PARTICULAR PURPOSE.
- */
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Threading;
-using System.Threading.Tasks;
-using DocumentFormat.OpenXml.Packaging;
-
-namespace HtmlToOpenXml.IO;
-
-interface IImageLoader
-{
- ///
- /// Download the remote or local image located at the specified url.
- ///
- Task Download(string imageUri, CancellationToken cancellationToken);
-}
-
-///
-/// Download and provison the metadata of a requested image.
-///
-sealed class ImagePrefetcher : IImageLoader
- where T: OpenXmlPartContainer, ISupportedRelationship
-{
- // Map extension to PartTypeInfo
- private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) {
- { ".gif", ImagePartType.Gif },
- { ".bmp", ImagePartType.Bmp },
- { ".emf", ImagePartType.Emf },
- { ".ico", ImagePartType.Icon },
- { ".jp2", ImagePartType.Jp2 },
- { ".jpeg", ImagePartType.Jpeg },
- { ".jpg", ImagePartType.Jpeg },
- { ".jpe", ImagePartType.Jpeg },
- { ".pcx", ImagePartType.Pcx },
- { ".png", ImagePartType.Png },
- { ".svg", ImagePartType.Svg },
- { ".tif", ImagePartType.Tif },
- { ".tiff", ImagePartType.Tiff },
- { ".wmf", ImagePartType.Wmf }
- };
- private readonly T hostingPart;
- private readonly IWebRequest resourceLoader;
- private readonly HtmlImageInfoCollection prefetchedImages;
-
-
- ///
- /// Constructor.
- ///
- /// The image will be linked to that hosting part.
- /// Images are not shared between header, footer and body.
- /// Service to resolve an image.
- public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
- {
- this.hostingPart = hostingPart;
- this.resourceLoader = resourceLoader;
- this.prefetchedImages = new HtmlImageInfoCollection();
- }
-
- //____________________________________________________________________
- //
- // Public Functionality
-
- ///
- /// Download the remote or local image located at the specified url.
- ///
- public async Task Download(string imageUri, CancellationToken cancellationToken)
- {
- if (prefetchedImages.Contains(imageUri))
- return prefetchedImages[imageUri];
-
- HtmlImageInfo? iinfo;
- if (DataUri.IsWellFormed(imageUri)) // data inline, encoded in base64
- {
- iinfo = ReadDataUri(imageUri);
- }
- else
- {
- iinfo = await DownloadRemoteImage(imageUri, cancellationToken);
- }
-
- if (iinfo != null)
- prefetchedImages.Add(iinfo);
-
- return iinfo;
- }
-
- ///
- /// Download the image and try to find its format type.
- ///
- private async Task DownloadRemoteImage(string src, CancellationToken cancellationToken)
- {
- Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute);
- if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme))
- return null;
-
- Resource? response;
-
- response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false);
- if (response?.Content == null)
- return null;
-
- using (response)
- {
- // For requested url with no filename, we need to read the media mime type if provided
- response.Headers.TryGetValue("Content-Type", out var mime);
- if (!TryInspectMimeType(mime, out PartTypeInfo type)
- && !TryGuessTypeFromUri(imageUri, out type)
- && !TryGuessTypeFromStream(response.Content, out type))
- {
- return null;
- }
-
- var ipart = hostingPart.AddImagePart(type);
- Size originalSize;
- using (var outputStream = ipart.GetStream(FileMode.Create))
- {
- response.Content.CopyTo(outputStream);
-
- outputStream.Seek(0L, SeekOrigin.Begin);
- originalSize = GetImageSize(outputStream);
- }
-
- return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
- TypeInfo = type,
- Size = originalSize
- };
- }
- }
-
- ///
- /// Parse the Data inline image.
- ///
- private HtmlImageInfo? ReadDataUri(string src)
- {
- if (DataUri.TryCreate(src, out var dataUri))
- {
- Size originalSize;
- knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
- var ipart = hostingPart.AddImagePart(type);
- using (var outputStream = ipart.GetStream(FileMode.Create))
- {
- outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);
-
- outputStream.Seek(0L, SeekOrigin.Begin);
- originalSize = GetImageSize(outputStream);
- }
-
- return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
- TypeInfo = type,
- Size = originalSize
- };
- }
-
- return null;
- }
-
- //____________________________________________________________________
- //
- // Private Implementation
-
- // http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature
- private static readonly Dictionary knownContentType = new(StringComparer.OrdinalIgnoreCase) {
- { "image/gif", ImagePartType.Gif },
- { "image/pjpeg", ImagePartType.Jpeg },
- { "image/jp2", ImagePartType.Jp2 },
- { "image/jpg", ImagePartType.Jpeg },
- { "image/jpeg", ImagePartType.Jpeg },
- { "image/x-png", ImagePartType.Png },
- { "image/png", ImagePartType.Png },
- { "image/tiff", ImagePartType.Tiff },
- { "image/vnd.microsoft.icon", ImagePartType.Icon },
- // these icons mime type are wrong but we should nevertheless take care (http://en.wikipedia.org/wiki/ICO_%28file_format%29#MIME_type)
- { "image/x-icon", ImagePartType.Icon },
- { "image/icon", ImagePartType.Icon },
- { "image/ico", ImagePartType.Icon },
- { "text/ico", ImagePartType.Icon },
- { "text/application-ico", ImagePartType.Icon },
- { "image/bmp", ImagePartType.Bmp },
- { "image/svg+xml", ImagePartType.Svg },
- };
-
- ///
- /// Inspect the response headers of a web request and decode the mime type if provided
- ///
- /// Returns the extension of the image if provideds.
- private static bool TryInspectMimeType(string? contentType, out PartTypeInfo type)
- {
- // can be null when the protocol used doesn't allow response headers
- if (contentType != null &&
- knownContentType.TryGetValue(contentType, out type))
- return true;
-
- type = default;
- return false;
- }
-
- ///
- /// Gets the OpenXml PartTypeInfo associated to an image.
- ///
- private static bool TryGuessTypeFromUri(Uri uri, out PartTypeInfo type)
- {
- string extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.Segments[uri.Segments.Length - 1] : uri.OriginalString);
- if (knownExtensions.TryGetValue(extension, out type)) return true;
-
- // extension not recognized, try with checking the query string. Expecting to resolve something like:
- // ./image.axd?picture=img1.jpg
- extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.AbsoluteUri : uri.ToString());
- if (knownExtensions.TryGetValue(extension, out type)) return true;
-
- return false;
- }
-
- ///
- /// Gets the OpenXml PartTypeInfo associated to an image.
- ///
- private static bool TryGuessTypeFromStream(Stream stream, out PartTypeInfo type)
- {
- if (ImageHeader.TryDetectFileType(stream, out ImageHeader.FileType guessType))
- {
- switch (guessType)
- {
- case ImageHeader.FileType.Bitmap: type = ImagePartType.Bmp; return true;
- case ImageHeader.FileType.Emf: type = ImagePartType.Emf; return true;
- case ImageHeader.FileType.Gif: type = ImagePartType.Gif; return true;
- case ImageHeader.FileType.Jpeg: type = ImagePartType.Jpeg; return true;
- case ImageHeader.FileType.Png: type = ImagePartType.Png; return true;
- }
- }
- type = ImagePartType.Bmp;
- return false;
- }
-
- ///
- /// Loads an image from a stream and grab its size.
- ///
- private static Size GetImageSize(Stream imageStream)
- {
- // Read only the size of the image
- try
- {
- return ImageHeader.GetDimensions(imageStream);
- }
- catch (ArgumentException)
- {
- return Size.Empty;
- }
- }
-}
+/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
+ *
+ * This source is subject to the Microsoft Permissive License.
+ * Please see the License.txt file for more information.
+ * All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+ * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+ * PARTICULAR PURPOSE.
+ */
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+using DocumentFormat.OpenXml.Packaging;
+
+namespace HtmlToOpenXml.IO;
+
+interface IImageLoader
+{
+ ///
+ /// Download the remote or local image located at the specified url.
+ ///
+ Task Download(string imageUri, CancellationToken cancellationToken);
+}
+
+///
+/// Download and provison the metadata of a requested image.
+///
+sealed class ImagePrefetcher : IImageLoader
+ where T: OpenXmlPartContainer, ISupportedRelationship
+{
+ // Map extension to PartTypeInfo
+ private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) {
+ { ".gif", ImagePartType.Gif },
+ { ".bmp", ImagePartType.Bmp },
+ { ".emf", ImagePartType.Emf },
+ { ".ico", ImagePartType.Icon },
+ { ".jp2", ImagePartType.Jp2 },
+ { ".jpeg", ImagePartType.Jpeg },
+ { ".jpg", ImagePartType.Jpeg },
+ { ".jpe", ImagePartType.Jpeg },
+ { ".pcx", ImagePartType.Pcx },
+ { ".png", ImagePartType.Png },
+ { ".svg", ImagePartType.Svg },
+ { ".tif", ImagePartType.Tif },
+ { ".tiff", ImagePartType.Tiff },
+ { ".wmf", ImagePartType.Wmf }
+ };
+ private readonly T hostingPart;
+ private readonly IWebRequest resourceLoader;
+ private readonly HtmlImageInfoCollection prefetchedImages;
+
+
+ ///
+ /// Constructor.
+ ///
+ /// The image will be linked to that hosting part.
+ /// Images are not shared between header, footer and body.
+ /// Service to resolve an image.
+ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
+ {
+ this.hostingPart = hostingPart;
+ this.resourceLoader = resourceLoader;
+ this.prefetchedImages = new HtmlImageInfoCollection();
+ }
+
+ //____________________________________________________________________
+ //
+ // Public Functionality
+
+ ///
+ /// Download the remote or local image located at the specified url.
+ ///
+ public async Task Download(string imageUri, CancellationToken cancellationToken)
+ {
+ if (prefetchedImages.Contains(imageUri))
+ return prefetchedImages[imageUri];
+
+ HtmlImageInfo? iinfo;
+ if (DataUri.IsWellFormed(imageUri)) // data inline, encoded in base64
+ {
+ iinfo = ReadDataUri(imageUri);
+ }
+ else
+ {
+ iinfo = await DownloadRemoteImage(imageUri, cancellationToken).ConfigureAwait(false);
+ }
+
+ if (iinfo != null)
+ prefetchedImages.Add(iinfo);
+
+ return iinfo;
+ }
+
+ ///
+ /// Download the image and try to find its format type.
+ ///
+ private async Task DownloadRemoteImage(string src, CancellationToken cancellationToken)
+ {
+ Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute);
+ if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme))
+ return null;
+
+ Resource? response;
+
+ response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false);
+ if (response?.Content == null)
+ return null;
+
+ using (response)
+ {
+ // For requested url with no filename, we need to read the media mime type if provided
+ response.Headers.TryGetValue("Content-Type", out var mime);
+ if (!TryInspectMimeType(mime, out PartTypeInfo type)
+ && !TryGuessTypeFromUri(imageUri, out type)
+ && !TryGuessTypeFromStream(response.Content, out type))
+ {
+ return null;
+ }
+
+ var ipart = hostingPart.AddImagePart(type);
+ Size originalSize;
+ using (var outputStream = ipart.GetStream(FileMode.Create))
+ {
+ response.Content.CopyTo(outputStream);
+
+ outputStream.Seek(0L, SeekOrigin.Begin);
+ originalSize = GetImageSize(outputStream);
+ }
+
+ return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
+ TypeInfo = type,
+ Size = originalSize
+ };
+ }
+ }
+
+ ///
+ /// Parse the Data inline image.
+ ///
+ private HtmlImageInfo? ReadDataUri(string src)
+ {
+ if (DataUri.TryCreate(src, out var dataUri))
+ {
+ Size originalSize;
+ knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
+ var ipart = hostingPart.AddImagePart(type);
+ using (var outputStream = ipart.GetStream(FileMode.Create))
+ {
+ outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);
+
+ outputStream.Seek(0L, SeekOrigin.Begin);
+ originalSize = GetImageSize(outputStream);
+ }
+
+ return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
+ TypeInfo = type,
+ Size = originalSize
+ };
+ }
+
+ return null;
+ }
+
+ //____________________________________________________________________
+ //
+ // Private Implementation
+
+ // http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature
+ private static readonly Dictionary knownContentType = new(StringComparer.OrdinalIgnoreCase) {
+ { "image/gif", ImagePartType.Gif },
+ { "image/pjpeg", ImagePartType.Jpeg },
+ { "image/jp2", ImagePartType.Jp2 },
+ { "image/jpg", ImagePartType.Jpeg },
+ { "image/jpeg", ImagePartType.Jpeg },
+ { "image/x-png", ImagePartType.Png },
+ { "image/png", ImagePartType.Png },
+ { "image/tiff", ImagePartType.Tiff },
+ { "image/emf", ImagePartType.Emf },
+ { "image/x-emf", ImagePartType.Emf },
+ { "image/vnd.microsoft.icon", ImagePartType.Icon },
+ // these icons mime type are wrong but we should nevertheless take care (http://en.wikipedia.org/wiki/ICO_%28file_format%29#MIME_type)
+ { "image/x-icon", ImagePartType.Icon },
+ { "image/icon", ImagePartType.Icon },
+ { "image/ico", ImagePartType.Icon },
+ { "text/ico", ImagePartType.Icon },
+ { "text/application-ico", ImagePartType.Icon },
+ { "image/bmp", ImagePartType.Bmp },
+ { "image/svg+xml", ImagePartType.Svg },
+ };
+
+ ///
+ /// Inspect the response headers of a web request and decode the mime type if provided
+ ///
+ /// Returns the extension of the image if provideds.
+ private static bool TryInspectMimeType(string? contentType, out PartTypeInfo type)
+ {
+ // can be null when the protocol used doesn't allow response headers
+ if (contentType != null &&
+ knownContentType.TryGetValue(contentType, out type))
+ return true;
+
+ type = default;
+ return false;
+ }
+
+ ///
+ /// Gets the OpenXml PartTypeInfo associated to an image.
+ ///
+ private static bool TryGuessTypeFromUri(Uri uri, out PartTypeInfo type)
+ {
+ string extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.Segments[uri.Segments.Length - 1] : uri.OriginalString);
+ if (knownExtensions.TryGetValue(extension, out type)) return true;
+
+ // extension not recognized, try with checking the query string. Expecting to resolve something like:
+ // ./image.axd?picture=img1.jpg
+ extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.AbsoluteUri : uri.ToString());
+ if (knownExtensions.TryGetValue(extension, out type)) return true;
+
+ return false;
+ }
+
+ ///
+ /// Gets the OpenXml PartTypeInfo associated to an image.
+ ///
+ private static bool TryGuessTypeFromStream(Stream stream, out PartTypeInfo type)
+ {
+ if (ImageHeader.TryDetectFileType(stream, out ImageHeader.FileType guessType))
+ {
+ switch (guessType)
+ {
+ case ImageHeader.FileType.Bitmap: type = ImagePartType.Bmp; return true;
+ case ImageHeader.FileType.Emf: type = ImagePartType.Emf; return true;
+ case ImageHeader.FileType.Gif: type = ImagePartType.Gif; return true;
+ case ImageHeader.FileType.Jpeg: type = ImagePartType.Jpeg; return true;
+ case ImageHeader.FileType.Png: type = ImagePartType.Png; return true;
+ }
+ }
+ type = ImagePartType.Bmp;
+ return false;
+ }
+
+ ///
+ /// Loads an image from a stream and grab its size.
+ ///
+ private static Size GetImageSize(Stream imageStream)
+ {
+ // Read only the size of the image
+ try
+ {
+ return ImageHeader.GetDimensions(imageStream);
+ }
+ catch (ArgumentException)
+ {
+ return Size.Empty;
+ }
+ }
+}
diff --git a/src/Html2OpenXml/Primitives/HtmlColor.Named.cs b/src/Html2OpenXml/Primitives/HtmlColor.Named.cs
new file mode 100644
index 00000000..37a42fb9
--- /dev/null
+++ b/src/Html2OpenXml/Primitives/HtmlColor.Named.cs
@@ -0,0 +1,180 @@
+using System;
+#if NET5_0_OR_GREATER
+using System.Collections.Frozen;
+#endif
+using System.Collections.Generic;
+
+namespace HtmlToOpenXml;
+
+///
+/// Helper class to translate a named color to its ARGB representation.
+///
+partial struct HtmlColor
+{
+ private static readonly IReadOnlyDictionary namedColors = InitKnownColors();
+
+ private static HtmlColor GetNamedColor (ReadOnlySpan name)
+ {
+ // the longest built-in Color's name is much lower than this check, so we should not allocate here in a typical usage
+ Span loweredValue = name.Length <= 128 ? stackalloc char[name.Length] : new char[name.Length];
+
+ name.ToLowerInvariant(loweredValue);
+
+ namedColors.TryGetValue(loweredValue.ToString(), out var color);
+ return color;
+ }
+
+ private static IReadOnlyDictionary InitKnownColors()
+ {
+ var colors = new Dictionary()
+ {
+ { "black", Black },
+ { "white", FromArgb(255,255,255) },
+ { "aliceblue", FromArgb(240, 248, 255) },
+ { "lightsalmon", FromArgb(255, 160, 122) },
+ { "antiquewhite", FromArgb(250, 235, 215) },
+ { "lightseagreen", FromArgb(32, 178, 170) },
+ { "aqua", FromArgb(0, 255, 255) },
+ { "lightskyblue", FromArgb(135, 206, 250) },
+ { "aquamarine", FromArgb(127, 255, 212) },
+ { "lightslategray", FromArgb(119, 136, 153) },
+ { "azure", FromArgb(240, 255, 255) },
+ { "lightsteelblue", FromArgb(176, 196, 222) },
+ { "beige", FromArgb(245, 245, 220) },
+ { "lightyellow", FromArgb(255, 255, 224) },
+ { "bisque", FromArgb(255, 228, 196) },
+ { "lime", FromArgb(0, 255, 0) },
+ { "limegreen", FromArgb(50, 205, 50) },
+ { "blanchedalmond", FromArgb(255, 255, 205) },
+ { "linen", FromArgb(250, 240, 230) },
+ { "blue", FromArgb(0, 0, 255) },
+ { "magenta", FromArgb(255, 0, 255) },
+ { "blueviolet", FromArgb(138, 43, 226) },
+ { "maroon", FromArgb(128, 0, 0) },
+ { "brown", FromArgb(165, 42, 42) },
+ { "mediumaquamarine", FromArgb(102, 205, 170) },
+ { "burlywood", FromArgb(222, 184, 135) },
+ { "mediumblue", FromArgb(0, 0, 205) },
+ { "cadetblue", FromArgb(95, 158, 160) },
+ { "mediumprchid", FromArgb(186, 85, 211) },
+ { "chartreuse", FromArgb(127, 255, 0) },
+ { "mediumpurple", FromArgb(147, 112, 219) },
+ { "chocolate", FromArgb(210, 105, 30) },
+ { "mediumseagreen", FromArgb(60, 179, 113) },
+ { "coral", FromArgb(255, 127, 80) },
+ { "mediumslateblue", FromArgb(123, 104, 238) },
+ { "cornflowerblue", FromArgb(100, 149, 237) },
+ { "mediumspringbreen", FromArgb(0, 250, 154) },
+ { "cornsilk", FromArgb(255, 248, 220) },
+ { "mediumturquoise", FromArgb(72, 209, 204) },
+ { "crimson", FromArgb(220, 20, 60) },
+ { "mediumvioletred", FromArgb(199, 21, 112) },
+ { "cyan", FromArgb(0, 255, 255) },
+ { "midnightblue", FromArgb(25, 25, 112) },
+ { "darkblue", FromArgb(0, 0, 139) },
+ { "mintcream", FromArgb(245, 255, 250) },
+ { "darkcyan", FromArgb(0, 139, 139) },
+ { "mistyrose", FromArgb(255, 228, 225) },
+ { "darkgoldenrod", FromArgb(184, 134, 11) },
+ { "moccasin", FromArgb(255, 228, 181) },
+ { "darkgray", FromArgb(169, 169, 169) },
+ { "navajowhite", FromArgb(255, 222, 173) },
+ { "darkgreen", FromArgb(0, 100, 0) },
+ { "navy", FromArgb(0, 0, 128) },
+ { "darkkhaki", FromArgb(189, 183, 107) },
+ { "oldlace", FromArgb(253, 245, 230) },
+ { "darkmagenta", FromArgb(139, 0, 139) },
+ { "olive", FromArgb(128, 128, 0) },
+ { "darkolivegreen", FromArgb(85, 107, 47) },
+ { "olivedrab", FromArgb(107, 142, 45) },
+ { "darkorange", FromArgb(255, 140, 0) },
+ { "orange", FromArgb(255, 165, 0) },
+ { "darkorchid", FromArgb(153, 50, 204) },
+ { "orangered", FromArgb(255, 69, 0) },
+ { "darkred", FromArgb(139, 0, 0) },
+ { "orchid", FromArgb(218, 112, 214) },
+ { "darksalmon", FromArgb(233, 150, 122) },
+ { "palegoldenrod", FromArgb(238, 232, 170) },
+ { "darkseagreen", FromArgb(143, 188, 143) },
+ { "palegreen", FromArgb(152, 251, 152) },
+ { "darkslateblue", FromArgb(72, 61, 139) },
+ { "paleturquoise", FromArgb(175, 238, 238) },
+ { "darkslategray", FromArgb(40, 79, 79) },
+ { "palevioletred", FromArgb(219, 112, 147) },
+ { "darkturquoise", FromArgb(0, 206, 209) },
+ { "papayawhip", FromArgb(255, 239, 213) },
+ { "darkviolet", FromArgb(148, 0, 211) },
+ { "peachpuff", FromArgb(255, 218, 155) },
+ { "deeppink", FromArgb(255, 20, 147) },
+ { "peru", FromArgb(205, 133, 63) },
+ { "deepskyblue", FromArgb(0, 191, 255) },
+ { "pink", FromArgb(255, 192, 203) },
+ { "dimgray", FromArgb(105, 105, 105) },
+ { "plum", FromArgb(221, 160, 221) },
+ { "dodgerblue", FromArgb(30, 144, 255) },
+ { "powderblue", FromArgb(176, 224, 230) },
+ { "firebrick", FromArgb(178, 34, 34) },
+ { "purple", FromArgb(128, 0, 128) },
+ { "floralwhite", FromArgb(255, 250, 240) },
+ { "red", FromArgb(255, 0, 0) },
+ { "forestgreen", FromArgb(34, 139, 34) },
+ { "rosybrown", FromArgb(188, 143, 143) },
+ { "fuschia", FromArgb(255, 0, 255) },
+ { "royalblue", FromArgb(65, 105, 225) },
+ { "gainsboro", FromArgb(220, 220, 220) },
+ { "saddlebrown", FromArgb(139, 69, 19) },
+ { "ghostwhite", FromArgb(248, 248, 255) },
+ { "salmon", FromArgb(250, 128, 114) },
+ { "gold", FromArgb(255, 215, 0) },
+ { "sandybrown", FromArgb(244, 164, 96) },
+ { "goldenrod", FromArgb(218, 165, 32) },
+ { "seagreen", FromArgb(46, 139, 87) },
+ { "gray", FromArgb(128, 128, 128) },
+ { "seashell", FromArgb(255, 245, 238) },
+ { "green", FromArgb(0, 128, 0) },
+ { "sienna", FromArgb(160, 82, 45) },
+ { "greenyellow", FromArgb(173, 255, 47) },
+ { "silver", FromArgb(192, 192, 192) },
+ { "honeydew", FromArgb(240, 255, 240) },
+ { "skyblue", FromArgb(135, 206, 235) },
+ { "hotpink", FromArgb(255, 105, 180) },
+ { "slateblue", FromArgb(106, 90, 205) },
+ { "indianred", FromArgb(205, 92, 92) },
+ { "slategray", FromArgb(112, 128, 144) },
+ { "indigo", FromArgb(75, 0, 130) },
+ { "snow", FromArgb(255, 250, 250) },
+ { "ivory", FromArgb(255, 240, 240) },
+ { "springgreen", FromArgb(0, 255, 127) },
+ { "khaki", FromArgb(240, 230, 140) },
+ { "steelblue", FromArgb(70, 130, 180) },
+ { "lavender", FromArgb(230, 230, 250) },
+ { "tan", FromArgb(210, 180, 140) },
+ { "lavenderblush", FromArgb(255, 240, 245) },
+ { "teal", FromArgb(0, 128, 128) },
+ { "lawngreen", FromArgb(124, 252, 0) },
+ { "thistle", FromArgb(216, 191, 216) },
+ { "lemonchiffon", FromArgb(255, 250, 205) },
+ { "tomato", FromArgb(253, 99, 71) },
+ { "lightblue", FromArgb(173, 216, 230) },
+ { "turquoise", FromArgb(64, 224, 208) },
+ { "lightcoral", FromArgb(240, 128, 128) },
+ { "violet", FromArgb(238, 130, 238) },
+ { "lightcyan", FromArgb(224, 255, 255) },
+ { "wheat", FromArgb(245, 222, 179) },
+ { "lightgoldenrodyellow", FromArgb(250, 250, 210) },
+ { "lightgreen", FromArgb(144, 238, 144) },
+ { "whitesmoke", FromArgb(245, 245, 245) },
+ { "lightgray", FromArgb(211, 211, 211) },
+ { "yellow", FromArgb(255, 255, 0) },
+ { "Lightpink", FromArgb(255, 182, 193) },
+ { "yellowgreen", FromArgb(154, 205, 50) },
+ { "transparent", FromArgb(0, 0, 0, 0) }
+ };
+
+#if NET5_0_OR_GREATER
+ return colors.ToFrozenDictionary();
+#else
+ return colors;
+#endif
+ }
+}
\ No newline at end of file
diff --git a/src/Html2OpenXml/Primitives/HtmlColor.cs b/src/Html2OpenXml/Primitives/HtmlColor.cs
index 799063a6..2879aede 100755
--- a/src/Html2OpenXml/Primitives/HtmlColor.cs
+++ b/src/Html2OpenXml/Primitives/HtmlColor.cs
@@ -17,7 +17,7 @@ namespace HtmlToOpenXml;
///
/// Represents an ARGB color.
///
-readonly struct HtmlColor : IEquatable
+readonly partial struct HtmlColor : IEquatable
{
private static readonly char[] hexDigits = {
'0', '1', '2', '3', '4', '5', '6', '7',
@@ -115,7 +115,7 @@ public static HtmlColor Parse(string? htmlColor)
throw;
}
- return HtmlColorTranslator.FromHtml(htmlColor);
+ return GetNamedColor(htmlColor.AsSpan());
}
///
diff --git a/src/Html2OpenXml/Utilities/CollectionExtensions.cs b/src/Html2OpenXml/Utilities/CollectionExtensions.cs
index b66fb959..42aa8d2a 100644
--- a/src/Html2OpenXml/Utilities/CollectionExtensions.cs
+++ b/src/Html2OpenXml/Utilities/CollectionExtensions.cs
@@ -38,7 +38,7 @@ public static Task ForEachAsync(this IEnumerable source,
var throttler = new SemaphoreSlim(initialCount: Math.Max(1, parallelOptions.MaxDegreeOfParallelism));
var tasks = System.Linq.Enumerable.Select(source, async item =>
{
- await throttler.WaitAsync(parallelOptions.CancellationToken);
+ await throttler.WaitAsync(parallelOptions.CancellationToken).ConfigureAwait(false);
if (parallelOptions.CancellationToken.IsCancellationRequested) return;
try
diff --git a/src/Html2OpenXml/Utilities/HtmlColorTranslator.cs b/src/Html2OpenXml/Utilities/HtmlColorTranslator.cs
deleted file mode 100755
index ff59cb92..00000000
--- a/src/Html2OpenXml/Utilities/HtmlColorTranslator.cs
+++ /dev/null
@@ -1,168 +0,0 @@
-using System;
-using System.Collections.Generic;
-
-namespace HtmlToOpenXml;
-
-///
-/// Helper class to translate a named color to its ARGB representation.
-///
-static class HtmlColorTranslator
-{
- private static readonly Dictionary namedColors = InitKnownColors();
-
- public static HtmlColor FromHtml (string htmlColor)
- {
- namedColors.TryGetValue(htmlColor, out var color);
- return color;
- }
-
- private static Dictionary InitKnownColors()
- {
- var colors = new Dictionary(StringComparer.OrdinalIgnoreCase)
- {
- { "Black", HtmlColor.Black },
- { "White", HtmlColor.FromArgb(255,255,255) },
- { "AliceBlue", HtmlColor.FromArgb(240, 248, 255) },
- { "LightSalmon", HtmlColor.FromArgb(255, 160, 122) },
- { "AntiqueWhite", HtmlColor.FromArgb(250, 235, 215) },
- { "LightSeaGreen", HtmlColor.FromArgb(32, 178, 170) },
- { "Aqua", HtmlColor.FromArgb(0, 255, 255) },
- { "LightSkyBlue", HtmlColor.FromArgb(135, 206, 250) },
- { "Aquamarine", HtmlColor.FromArgb(127, 255, 212) },
- { "LightSlateGray", HtmlColor.FromArgb(119, 136, 153) },
- { "Azure", HtmlColor.FromArgb(240, 255, 255) },
- { "LightSteelBlue", HtmlColor.FromArgb(176, 196, 222) },
- { "Beige", HtmlColor.FromArgb(245, 245, 220) },
- { "LightYellow", HtmlColor.FromArgb(255, 255, 224) },
- { "Bisque", HtmlColor.FromArgb(255, 228, 196) },
- { "Lime", HtmlColor.FromArgb(0, 255, 0) },
- { "LimeGreen", HtmlColor.FromArgb(50, 205, 50) },
- { "BlanchedAlmond", HtmlColor.FromArgb(255, 255, 205) },
- { "Linen", HtmlColor.FromArgb(250, 240, 230) },
- { "Blue", HtmlColor.FromArgb(0, 0, 255) },
- { "Magenta", HtmlColor.FromArgb(255, 0, 255) },
- { "BlueViolet", HtmlColor.FromArgb(138, 43, 226) },
- { "Maroon", HtmlColor.FromArgb(128, 0, 0) },
- { "Brown", HtmlColor.FromArgb(165, 42, 42) },
- { "MediumAquamarine", HtmlColor.FromArgb(102, 205, 170) },
- { "BurlyWood", HtmlColor.FromArgb(222, 184, 135) },
- { "MediumBlue", HtmlColor.FromArgb(0, 0, 205) },
- { "CadetBlue", HtmlColor.FromArgb(95, 158, 160) },
- { "MediumOrchid", HtmlColor.FromArgb(186, 85, 211) },
- { "Chartreuse", HtmlColor.FromArgb(127, 255, 0) },
- { "MediumPurple", HtmlColor.FromArgb(147, 112, 219) },
- { "Chocolate", HtmlColor.FromArgb(210, 105, 30) },
- { "MediumSeaGreen", HtmlColor.FromArgb(60, 179, 113) },
- { "Coral", HtmlColor.FromArgb(255, 127, 80) },
- { "MediumSlateBlue", HtmlColor.FromArgb(123, 104, 238) },
- { "CornflowerBlue", HtmlColor.FromArgb(100, 149, 237) },
- { "MediumSpringGreen", HtmlColor.FromArgb(0, 250, 154) },
- { "Cornsilk", HtmlColor.FromArgb(255, 248, 220) },
- { "MediumTurquoise", HtmlColor.FromArgb(72, 209, 204) },
- { "Crimson", HtmlColor.FromArgb(220, 20, 60) },
- { "MediumVioletRed", HtmlColor.FromArgb(199, 21, 112) },
- { "Cyan", HtmlColor.FromArgb(0, 255, 255) },
- { "MidnightBlue", HtmlColor.FromArgb(25, 25, 112) },
- { "DarkBlue", HtmlColor.FromArgb(0, 0, 139) },
- { "MintCream", HtmlColor.FromArgb(245, 255, 250) },
- { "DarkCyan", HtmlColor.FromArgb(0, 139, 139) },
- { "MistyRose", HtmlColor.FromArgb(255, 228, 225) },
- { "DarkGoldenrod", HtmlColor.FromArgb(184, 134, 11) },
- { "Moccasin", HtmlColor.FromArgb(255, 228, 181) },
- { "DarkGray", HtmlColor.FromArgb(169, 169, 169) },
- { "NavajoWhite", HtmlColor.FromArgb(255, 222, 173) },
- { "DarkGreen", HtmlColor.FromArgb(0, 100, 0) },
- { "Navy", HtmlColor.FromArgb(0, 0, 128) },
- { "DarkKhaki", HtmlColor.FromArgb(189, 183, 107) },
- { "OldLace", HtmlColor.FromArgb(253, 245, 230) },
- { "DarkMagenta", HtmlColor.FromArgb(139, 0, 139) },
- { "Olive", HtmlColor.FromArgb(128, 128, 0) },
- { "DarkOliveGreen", HtmlColor.FromArgb(85, 107, 47) },
- { "OliveDrab", HtmlColor.FromArgb(107, 142, 45) },
- { "DarkOrange", HtmlColor.FromArgb(255, 140, 0) },
- { "Orange", HtmlColor.FromArgb(255, 165, 0) },
- { "DarkOrchid", HtmlColor.FromArgb(153, 50, 204) },
- { "OrangeRed", HtmlColor.FromArgb(255, 69, 0) },
- { "DarkRed", HtmlColor.FromArgb(139, 0, 0) },
- { "Orchid", HtmlColor.FromArgb(218, 112, 214) },
- { "DarkSalmon", HtmlColor.FromArgb(233, 150, 122) },
- { "PaleGoldenrod", HtmlColor.FromArgb(238, 232, 170) },
- { "DarkSeaGreen", HtmlColor.FromArgb(143, 188, 143) },
- { "PaleGreen", HtmlColor.FromArgb(152, 251, 152) },
- { "DarkSlateBlue", HtmlColor.FromArgb(72, 61, 139) },
- { "PaleTurquoise", HtmlColor.FromArgb(175, 238, 238) },
- { "DarkSlateGray", HtmlColor.FromArgb(40, 79, 79) },
- { "PaleVioletRed", HtmlColor.FromArgb(219, 112, 147) },
- { "DarkTurquoise", HtmlColor.FromArgb(0, 206, 209) },
- { "PapayaWhip", HtmlColor.FromArgb(255, 239, 213) },
- { "DarkViolet", HtmlColor.FromArgb(148, 0, 211) },
- { "PeachPuff", HtmlColor.FromArgb(255, 218, 155) },
- { "DeepPink", HtmlColor.FromArgb(255, 20, 147) },
- { "Peru", HtmlColor.FromArgb(205, 133, 63) },
- { "DeepSkyBlue", HtmlColor.FromArgb(0, 191, 255) },
- { "Pink", HtmlColor.FromArgb(255, 192, 203) },
- { "DimGray", HtmlColor.FromArgb(105, 105, 105) },
- { "Plum", HtmlColor.FromArgb(221, 160, 221) },
- { "DodgerBlue", HtmlColor.FromArgb(30, 144, 255) },
- { "PowderBlue", HtmlColor.FromArgb(176, 224, 230) },
- { "Firebrick", HtmlColor.FromArgb(178, 34, 34) },
- { "Purple", HtmlColor.FromArgb(128, 0, 128) },
- { "FloralWhite", HtmlColor.FromArgb(255, 250, 240) },
- { "Red", HtmlColor.FromArgb(255, 0, 0) },
- { "ForestGreen", HtmlColor.FromArgb(34, 139, 34) },
- { "RosyBrown", HtmlColor.FromArgb(188, 143, 143) },
- { "Fuschia", HtmlColor.FromArgb(255, 0, 255) },
- { "RoyalBlue", HtmlColor.FromArgb(65, 105, 225) },
- { "Gainsboro", HtmlColor.FromArgb(220, 220, 220) },
- { "SaddleBrown", HtmlColor.FromArgb(139, 69, 19) },
- { "GhostWhite", HtmlColor.FromArgb(248, 248, 255) },
- { "Salmon", HtmlColor.FromArgb(250, 128, 114) },
- { "Gold", HtmlColor.FromArgb(255, 215, 0) },
- { "SandyBrown", HtmlColor.FromArgb(244, 164, 96) },
- { "Goldenrod", HtmlColor.FromArgb(218, 165, 32) },
- { "SeaGreen", HtmlColor.FromArgb(46, 139, 87) },
- { "Gray", HtmlColor.FromArgb(128, 128, 128) },
- { "Seashell", HtmlColor.FromArgb(255, 245, 238) },
- { "Green", HtmlColor.FromArgb(0, 128, 0) },
- { "Sienna", HtmlColor.FromArgb(160, 82, 45) },
- { "GreenYellow", HtmlColor.FromArgb(173, 255, 47) },
- { "Silver", HtmlColor.FromArgb(192, 192, 192) },
- { "Honeydew", HtmlColor.FromArgb(240, 255, 240) },
- { "SkyBlue", HtmlColor.FromArgb(135, 206, 235) },
- { "HotPink", HtmlColor.FromArgb(255, 105, 180) },
- { "SlateBlue", HtmlColor.FromArgb(106, 90, 205) },
- { "IndianRed", HtmlColor.FromArgb(205, 92, 92) },
- { "SlateGray", HtmlColor.FromArgb(112, 128, 144) },
- { "Indigo", HtmlColor.FromArgb(75, 0, 130) },
- { "Snow", HtmlColor.FromArgb(255, 250, 250) },
- { "Ivory", HtmlColor.FromArgb(255, 240, 240) },
- { "SpringGreen", HtmlColor.FromArgb(0, 255, 127) },
- { "Khaki", HtmlColor.FromArgb(240, 230, 140) },
- { "SteelBlue", HtmlColor.FromArgb(70, 130, 180) },
- { "Lavender", HtmlColor.FromArgb(230, 230, 250) },
- { "Tan", HtmlColor.FromArgb(210, 180, 140) },
- { "LavenderBlush", HtmlColor.FromArgb(255, 240, 245) },
- { "Teal", HtmlColor.FromArgb(0, 128, 128) },
- { "LawnGreen", HtmlColor.FromArgb(124, 252, 0) },
- { "Thistle", HtmlColor.FromArgb(216, 191, 216) },
- { "LemonChiffon", HtmlColor.FromArgb(255, 250, 205) },
- { "Tomato", HtmlColor.FromArgb(253, 99, 71) },
- { "LightBlue", HtmlColor.FromArgb(173, 216, 230) },
- { "Turquoise", HtmlColor.FromArgb(64, 224, 208) },
- { "LightCoral", HtmlColor.FromArgb(240, 128, 128) },
- { "Violet", HtmlColor.FromArgb(238, 130, 238) },
- { "LightCyan", HtmlColor.FromArgb(224, 255, 255) },
- { "Wheat", HtmlColor.FromArgb(245, 222, 179) },
- { "LightGoldenrodYellow", HtmlColor.FromArgb(250, 250, 210) },
- { "LightGreen", HtmlColor.FromArgb(144, 238, 144) },
- { "WhiteSmoke", HtmlColor.FromArgb(245, 245, 245) },
- { "LightGray", HtmlColor.FromArgb(211, 211, 211) },
- { "Yellow", HtmlColor.FromArgb(255, 255, 0) },
- { "LightPink", HtmlColor.FromArgb(255, 182, 193) },
- { "YellowGreen", HtmlColor.FromArgb(154, 205, 50) },
- { "Transparent", HtmlColor.FromArgb(0, 0, 0, 0) }
- };
-
- return colors;
- }
-}
\ No newline at end of file
diff --git a/test/HtmlToOpenXml.Tests/AbbrTests.cs b/test/HtmlToOpenXml.Tests/AbbrTests.cs
index 2f65ad1f..7a6cb88d 100644
--- a/test/HtmlToOpenXml.Tests/AbbrTests.cs
+++ b/test/HtmlToOpenXml.Tests/AbbrTests.cs
@@ -175,8 +175,9 @@ public void InsideParagraph_ReturnsMultipleRuns()
Assert.That(elements, Has.Count.EqualTo(1));
Assert.Multiple(() => {
Assert.That(elements[0], Is.TypeOf(typeof(Paragraph)));
- Assert.That(elements[0].Elements().Count(), Is.EqualTo(6), "3 textual runs + 3 breaks");
+ Assert.That(elements[0].Elements().Count(), Is.EqualTo(4), "4 runs: Pre abbr, abbr, quote number, post abbr");
Assert.That(elements[0].Elements().Any(r => r.HasChild()), Is.True);
+ Assert.That(elements[0].InnerText, Is.EqualTo("The NASA is an independent agency of the U.S. federal government responsible for the civil space program, aeronautics research, and space research."));
});
}
diff --git a/test/HtmlToOpenXml.Tests/ElementTests.cs b/test/HtmlToOpenXml.Tests/ElementTests.cs
index cb2f55c6..a82685e3 100644
--- a/test/HtmlToOpenXml.Tests/ElementTests.cs
+++ b/test/HtmlToOpenXml.Tests/ElementTests.cs
@@ -28,7 +28,6 @@ public void PhrasingTag_ReturnsRunWithDefaultStyle (string html) where T : Op
[TestCase(@"Superscript", ExpectedResult = "superscript")]
public string? SubSup_ReturnsRunWithVerticalAlignment (string html)
{
- //var val = new VerticalPositionValues(tagName);
var textAlign = ParsePhrasing(html);
Assert.That(textAlign.Val?.HasValue, Is.True);
return textAlign.Val.InnerText;
@@ -154,9 +153,32 @@ public void FigCaption_ReturnsRunWithSimpleField()
Assert.Multiple(() =>
{
- Assert.That(elements[0].ChildElements, Has.Count.EqualTo(3));
Assert.That(elements[0].HasChild(), Is.True);
Assert.That(elements[0].HasChild(), Is.True);
+ Assert.That(elements[0].Elements().Count(), Is.EqualTo(3));
+ Assert.That(elements[0].GetFirstChild()!.KeepNext, Is.Null);
+ Assert.That(elements[0].GetFirstChild()!.Instruction?.Value, Does.Contain("SEQ Figure \\* ARABIC"));
+ });
+ }
+
+ [Test]
+ public void FigCaption_WithHeading_ReturnsParagraphWithSimpleField()
+ {
+ var elements = converter.Parse(@"
+