diff --git a/.vscode/settings.json b/.vscode/settings.json index e8962d50..9b58ce6b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,9 +1,9 @@ { - "omnisharp.organizeImportsOnFormat": true, "dotnet.completion.showCompletionItemsFromUnimportedNamespaces": false, "coverage-gutters.coverageFileNames":[ "coverage.info" ], "coverage-gutters.showGutterCoverage": false, - "coverage-gutters.showLineCoverage": true + "coverage-gutters.showLineCoverage": true, + "dotnet.formatting.organizeImportsOnFormat": true } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index bba08de9..7b0af9c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## 3.2.5 + +- Fix a crash with the new whitespace handling introduced in 3.2.3 #191 +- Fix crash when the html contains 2 images with identical source path #193 +- Support margin auto for table alignment #194 +- Fix handling whitespace between runs #195 +- Whitelist more mime-types as specified by the IANA standard #196 +- Support EMF file #196 +- Correct handling of `figcaption` (allow nested phrasings) #197 +- Numbering list now supports type attribute `
    ` #198 +- Always restart nested numbering list #198 +- Fix table borders being removed even when the specified word table style has borders #199 +- Defensive code when download image stream is truncated #201 +- Table inside list is constrained to not exceed page margin #202 +- Table now supports width:auto for auto-fit content #202 + +## 3.2.4 + +- Fix a crash with the new whitespace handling introduced in 3.2.3 #191 +- Table inside list must be aligned with the list item #192 + +## 3.2.3 + +- Improve support of table alignment #187 +- Fix a crash if a span is empty +- Heading with only digits should not be considered as a numbering #189 +- Fix whitespaces inserted between spans #179 and #185 +- Support percentage size (typically width:100%) for img node #188 + ## 3.2.2 - Supports a feature to disable heading numbering #175 diff --git a/examples/Demo/Demo.csproj b/examples/Demo/Demo.csproj index 3e76b9dc..d0b3a6f8 100644 --- a/examples/Demo/Demo.csproj +++ b/examples/Demo/Demo.csproj @@ -21,6 +21,7 @@ + \ No newline at end of file diff --git a/examples/Demo/Resources/LargeImg.html b/examples/Demo/Resources/LargeImg.html new file mode 100644 index 00000000..79b187ce --- /dev/null +++ b/examples/Demo/Resources/LargeImg.html @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/examples/Demo/app.config b/examples/Demo/app.config deleted file mode 100644 index 400b70b3..00000000 --- a/examples/Demo/app.config +++ /dev/null @@ -1,13 +0,0 @@ - - - - \ No newline at end of file diff --git a/examples/Demo/images/The-Song-of-the-World.jpg b/examples/Demo/images/The-Song-of-the-World.jpg new file mode 100644 index 00000000..5e1d8983 Binary files /dev/null and b/examples/Demo/images/The-Song-of-the-World.jpg differ diff --git a/src/Html2OpenXml/Expressions/BlockElementExpression.cs b/src/Html2OpenXml/Expressions/BlockElementExpression.cs index 62bc8ced..81a8a4f2 100644 --- a/src/Html2OpenXml/Expressions/BlockElementExpression.cs +++ b/src/Html2OpenXml/Expressions/BlockElementExpression.cs @@ -27,6 +27,7 @@ class BlockElementExpression: PhrasingElementExpression { private readonly OpenXmlLeafElement[]? defaultStyleProperties; protected readonly ParagraphProperties paraProperties = new(); + protected TableProperties? tableProperties; // some style attributes, such as borders or bgcolor, will convert this node to a framed container protected bool renderAsFramed; private HtmlBorder styleBorder; @@ -115,22 +116,44 @@ protected override IEnumerable Interpret ( public override void CascadeStyles(OpenXmlElement element) { base.CascadeStyles(element); - if (!paraProperties.HasChildren || element is not Paragraph paragraph) + if (!paraProperties.HasChildren) return; - paragraph.ParagraphProperties ??= new ParagraphProperties(); - - var knownTags = new HashSet(); - foreach (var prop in paragraph.ParagraphProperties) + if (element is Paragraph paragraph) { - if (!knownTags.Contains(prop.LocalName)) - knownTags.Add(prop.LocalName); - } + paragraph.ParagraphProperties ??= new ParagraphProperties(); - foreach (var prop in paraProperties) + var knownTags = new HashSet(); + foreach (var prop in paragraph.ParagraphProperties) + { + if (!knownTags.Contains(prop.LocalName)) + knownTags.Add(prop.LocalName); + } + + foreach (var prop in paraProperties) + { + if (!knownTags.Contains(prop.LocalName)) + paragraph.ParagraphProperties.AddChild(prop.CloneNode(true)); + } + } + else if (tableProperties != null && element is Table table) { - if (!knownTags.Contains(prop.LocalName)) - paragraph.ParagraphProperties.AddChild(prop.CloneNode(true)); + var props = table.GetFirstChild(); + if (props is null) + return; + + var knownTags = new HashSet(); + foreach (var prop in props) + { + if (!knownTags.Contains(prop.LocalName)) + knownTags.Add(prop.LocalName); + } + + foreach (var prop in tableProperties) + { + if (!knownTags.Contains(prop.LocalName)) + props.AddChild(prop.CloneNode(true)); + } } } @@ -170,9 +193,12 @@ protected override void ComposeStyles (ParsingContext context) JustificationValues? align = Converter.ToParagraphAlign(styleAttributes!["text-align"]); if (!align.HasValue) align = Converter.ToParagraphAlign(node.GetAttribute("align")); + if (!align.HasValue) align = Converter.ToParagraphAlign(styleAttributes["justify-content"]); if (align.HasValue) { paraProperties.Justification = new() { Val = align }; + tableProperties ??= new(); + tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() }; } @@ -194,7 +220,7 @@ protected override void ComposeStyles (ParsingContext context) } var margin = styleAttributes.GetMargin("margin"); - Indentation? indentation = null; + Indentation? indentation = null; if (!margin.IsEmpty) { if (margin.Top.IsFixed || margin.Bottom.IsFixed) @@ -345,7 +371,7 @@ private static Paragraph CreateParagraph(ParsingContext context, IList -/// Process the parsing of a figcaption element, which is used to describe an image. -/// -sealed class FigureCaptionExpression(IHtmlElement node) : PhrasingElementExpression(node) -{ - - /// - public override IEnumerable Interpret (ParsingContext context) - { - ComposeStyles(context); - var childElements = Interpret(context.CreateChild(this), node.ChildNodes); - if (!childElements.Any()) - return []; - - var p = new Paragraph ( - new Run( - new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve } - ), - new SimpleField( - new Run( - new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture))) - ) { Instruction = " SEQ Figure \\* ARABIC " } - ) { - ParagraphProperties = new ParagraphProperties { - ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle), - KeepNext = new KeepNext() - } - }; - - if (childElements.First() is Run run) // any caption? - { - Text? t = run.GetFirstChild(); - if (t != null) - t.Text = " " + t.InnerText; // append a space after the numero of the picture - } - - return [p]; - } - - /// - /// Add a new figure caption to the document. - /// - /// Returns the id of the new figure caption. - private static int AddFigureCaption(ParsingContext context) - { - var figCaptionRef = context.Properties("figCaptionRef"); - if (!figCaptionRef.HasValue) - { - figCaptionRef = 0; - foreach (var p in context.MainPart.Document.Descendants()) - { - if (p.Instruction == " SEQ Figure \\* ARABIC ") - figCaptionRef++; - } - } - figCaptionRef++; - - context.Properties("figCaptionRef", figCaptionRef); - return figCaptionRef.Value; - } -} \ No newline at end of file +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved + * + * This source is subject to the Microsoft Permissive License. + * Please see the License.txt file for more information. + * All other rights reserved. + * + * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY + * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A + * PARTICULAR PURPOSE. + */ +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using AngleSharp.Html.Dom; +using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Wordprocessing; + +namespace HtmlToOpenXml.Expressions; + +/// +/// Process the parsing of a figcaption element, which is used to describe an image. +/// +sealed class FigureCaptionExpression(IHtmlElement node) : BlockElementExpression(node) +{ + + /// + public override IEnumerable Interpret (ParsingContext context) + { + ComposeStyles(context); + var childElements = Interpret(context.CreateChild(this), node.ChildNodes); + + var figNumRef = new List() { + new Run( + new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve } + ), + new SimpleField( + new Run( + new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture))) + ) { Instruction = " SEQ Figure \\* ARABIC " } + }; + + + if (!childElements.Any()) + { + return [new Paragraph(figNumRef) { + ParagraphProperties = new ParagraphProperties { + ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle), + KeepNext = DetermineKeepNext(node), + } + }]; + } + + //Add the figure number references to the start of the first paragraph. + if(childElements.FirstOrDefault() is Paragraph p) + { + var properties = p.GetFirstChild(); + p.InsertAfter(new Run( + new Text(" ") { Space = SpaceProcessingModeValues.Preserve } + ), properties); + p.InsertAfter(figNumRef[1], properties); + p.InsertAfter(figNumRef[0], properties); + } + else + { + // The first child of the figure caption is a table or something. + // Just prepend a new paragraph with the figure number reference. + childElements = [ + new Paragraph(figNumRef), + ..childElements + ]; + } + + foreach (var paragraph in childElements.OfType()) + { + paragraph.ParagraphProperties ??= new ParagraphProperties(); + paragraph.ParagraphProperties.ParagraphStyleId ??= context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle); + //Keep caption paragraphs together. + paragraph.ParagraphProperties.KeepNext = new KeepNext(); + } + + if(childElements.OfType().LastOrDefault() is Paragraph lastPara) + { + lastPara.ParagraphProperties!.KeepNext = DetermineKeepNext(node); + } + + return childElements; + } + + /// + /// Add a new figure caption to the document. + /// + /// Returns the id of the new figure caption. + private static int AddFigureCaption(ParsingContext context) + { + var figCaptionRef = context.Properties("figCaptionRef"); + if (!figCaptionRef.HasValue) + { + figCaptionRef = 0; + foreach (var p in context.MainPart.Document.Descendants()) + { + if (p.Instruction == " SEQ Figure \\* ARABIC ") + figCaptionRef++; + } + } + figCaptionRef++; + + context.Properties("figCaptionRef", figCaptionRef); + return figCaptionRef.Value; + } + + /// + /// Determines whether the KeepNext property should apply this this caption. + /// + /// A new or null. + private static KeepNext? DetermineKeepNext(IHtmlElement node) + { + // A caption at the end of a figure will have no next sibling. + if(node.NextElementSibling is null) + { + return null; + } + return new(); + } +} diff --git a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs index 44ec6bc0..3efc82ef 100644 --- a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs +++ b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs @@ -38,9 +38,11 @@ private static Dictionary> InitKnownTa { TagNames.Abbr, el => new AbbreviationExpression((IHtmlElement) el) }, { "acronym", el => new AbbreviationExpression((IHtmlElement) el) }, { TagNames.B, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) }, + { TagNames.Big, el => new PhrasingElementExpression((IHtmlElement) el, new FontSize() { Val = "36" }) }, { TagNames.BlockQuote, el => new BlockQuoteExpression((IHtmlElement) el) }, { TagNames.Br, _ => new LineBreakExpression() }, { TagNames.Cite, el => new CiteElementExpression((IHtmlElement) el) }, + { TagNames.Code, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.Dd, el => new BlockElementExpression((IHtmlElement) el, new Indentation() { FirstLine = "708" }, new SpacingBetweenLines() { After = "0" }) }, { TagNames.Del, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) }, { TagNames.Dfn, el => new AbbreviationExpression((IHtmlElement) el) }, @@ -57,10 +59,18 @@ private static Dictionary> InitKnownTa { TagNames.Hr, el => new HorizontalLineExpression((IHtmlElement) el) }, { TagNames.Img, el => new ImageExpression((IHtmlImageElement) el) }, { TagNames.Ins, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) }, + { TagNames.Kbd, el => new PhrasingElementExpression((IHtmlElement) el) }, + { TagNames.Mark, el => new PhrasingElementExpression((IHtmlElement) el, new Shading { Val = ShadingPatternValues.Clear, Fill = "FFFF00" /* yellow */ }) }, + { TagNames.NoBr, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.Ol, el => new ListExpression((IHtmlElement) el) }, { TagNames.Pre, el => new PreElementExpression((IHtmlElement) el) }, { TagNames.Q, el => new QuoteElementExpression((IHtmlElement) el) }, { TagNames.Quote, el => new QuoteElementExpression((IHtmlElement) el) }, + { TagNames.Rb, el => new PhrasingElementExpression((IHtmlElement) el) }, + { TagNames.Rt, el => new PhrasingElementExpression((IHtmlElement) el) }, + { TagNames.Ruby, el => new BlockElementExpression((IHtmlElement) el) }, + { TagNames.Samp, el => new PhrasingElementExpression((IHtmlElement) el) }, + { TagNames.Small, el => new PhrasingElementExpression((IHtmlElement) el, new FontSize() { Val = "20" }) }, { TagNames.Span, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.S, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) }, { TagNames.Strike, el => new PhrasingElementExpression((IHtmlElement) el, new Strike()) }, @@ -70,8 +80,10 @@ private static Dictionary> InitKnownTa { TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) }, { TagNames.Table, el => new TableExpression((IHtmlTableElement) el) }, { TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) }, + { TagNames.Tt, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) }, { TagNames.Ul, el => new ListExpression((IHtmlElement) el) }, + { TagNames.Var, el => new PhrasingElementExpression((IHtmlElement) el) } }; return knownTags; @@ -83,7 +95,6 @@ private static Dictionary> InitKnownTa /// The parsing context. public abstract IEnumerable Interpret (ParsingContext context); - /// /// Create a new interpreter for the given html tag. /// diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs index a4b61c30..c1c4df32 100644 --- a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs @@ -56,9 +56,14 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) } if (imgNode.DisplayHeight > 0) { - // Image perspective skewed. Bug fixed by ddeforge on github.com/onizet/html2openxml/discussions/350500 preferredSize.Height = imgNode.DisplayHeight; } + if (preferredSize.IsEmpty) + { + var styles = imgNode.GetStyles(); + preferredSize.Width = GetDimension(styles, "width", "max-width", 642); + preferredSize.Height = GetDimension(styles, "height", "max-height", 428); + } HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None) .ConfigureAwait(false).GetAwaiter().GetResult(); @@ -85,6 +90,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) else if (preferredSize.Width <= 0 || preferredSize.Height <= 0) { Size actualSize = iinfo.Size; + // Image perspective skewed. Bug fixed by ddeforge on github.com/onizet/html2openxml/discussions/350500 preferredSize = ImageHeader.KeepAspectRatio(actualSize, preferredSize); } @@ -130,4 +136,22 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) return img; } + + private static int GetDimension(HtmlAttributeCollection styles, string primaryStyle, string fallbackStyle, int percentageBase) + { + var unit = styles.GetUnit(primaryStyle); + if (!unit.IsValid) + { + unit = styles.GetUnit(fallbackStyle); + } + + if (unit.IsValid) + { + return unit.Type == UnitMetric.Percent? + (int)(unit.Value * percentageBase / 100) : + unit.ValueInPx; + } + + return 0; + } } \ No newline at end of file diff --git a/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs b/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs index 9a9f6ffd..1039fae8 100644 --- a/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs +++ b/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs @@ -9,6 +9,7 @@ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A * PARTICULAR PURPOSE. */ +using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; @@ -23,7 +24,8 @@ namespace HtmlToOpenXml.Expressions; /// sealed class HeadingElementExpression(IHtmlElement node) : NumberingExpressionBase(node) { - private static readonly Regex numberingRegex = new(@"^\s*(\d+\.?)*\s*"); + private static readonly Regex numberingRegex = new(@"^\s*(?[0-9\.]+\s*)[^0-9]", + RegexOptions.Compiled, TimeSpan.FromMilliseconds(100)); /// public override IEnumerable Interpret (ParsingContext context) @@ -36,7 +38,7 @@ public override IEnumerable Interpret (ParsingContext context) var paragraph = childElements.FirstOrDefault() as Paragraph; - paragraph ??= new Paragraph(childElements); + paragraph ??= new(childElements); paragraph.ParagraphProperties ??= new(); paragraph.ParagraphProperties.ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.HeadingStyle + level); @@ -65,16 +67,30 @@ public override IEnumerable Interpret (ParsingContext context) private static bool IsNumbering(OpenXmlElement runElement) { + if (runElement.InnerText is null) + return false; + // Check if the line starts with a number format (1., 1.1., 1.1.1.) // If it does, make sure we make the heading a numbered item - Match regexMatch = numberingRegex.Match(runElement.InnerText ?? string.Empty); + var headingText = runElement.InnerText; + Match regexMatch; + try + { + regexMatch = numberingRegex.Match(headingText); + } + catch (RegexMatchTimeoutException) + { + return false; + } + // Make sure we only grab the heading if it starts with a number - if (regexMatch.Groups.Count > 1 && regexMatch.Groups[1].Captures.Count > 0) + if (regexMatch.Success && headingText.Length > regexMatch.Groups["number"].Length) { - // Strip numbers from text + // Strip numbers from text + headingText = headingText.Substring(regexMatch.Groups["number"].Length); runElement.InnerXml = runElement.InnerXml - .Replace(runElement.InnerText!, runElement.InnerText!.Substring(regexMatch.Length)); + .Replace(runElement.InnerText!, headingText); return true; } diff --git a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs index 3fa20af3..ed7af86d 100644 --- a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs +++ b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs @@ -1,4 +1,4 @@ -/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved * * This source is subject to the Microsoft Permissive License. * Please see the License.txt file for more information. @@ -91,6 +91,24 @@ public override IEnumerable Interpret(ParsingContext context) var childElements = expression.Interpret(context); if (!childElements.Any()) continue; + // table must be aligned to the list item + var tables = childElements.OfType(); + var tableIndentation = level * Indentation * 2; + foreach (var table in tables) + { + var tableProperties = table.GetFirstChild(); + if (tableProperties == null) + table.PrependChild(tableProperties = new()); + + tableProperties.TableIndentation ??= new() { Width = tableIndentation }; + // ensure to restrain the table width to the list item + if (tableProperties.TableWidth?.Type?.Value == TableWidthUnitValues.Pct + && tableProperties.TableWidth?.Width?.Value == "5000") + { + tableProperties.TableWidth.Width = (5000 - tableIndentation).ToString(); + } + } + // ensure to filter out any non-paragraph like any nested table var paragraphs = childElements.OfType(); var listItemStyleId = GetStyleIdForListItem(context.DocumentStyle, liNode); @@ -143,7 +161,15 @@ private ListContext ConcretiseInstance(ParsingContext context, int abstractNumId int overrideLevelIndex = 0; var isOrderedTag = node.NodeName.Equals("ol", StringComparison.OrdinalIgnoreCase); var dir = node.GetTextDirection(); - if (!instanceId.HasValue || context.Converter.ContinueNumbering == false) + + // be sure to restart to 1 any nested ordered list + if (currentLevel > 0 && isOrderedTag) + { + instanceId = IncrementInstanceId(context, abstractNumId, isReusable: false); + overrideLevelIndex = currentLevel; + listContext = new ListContext(listStyle, abstractNumId, instanceId.Value, currentLevel + 1, dir); + } + else if (!instanceId.HasValue || context.Converter.ContinueNumbering == false) { // create a new instance of that list template instanceId = IncrementInstanceId(context, abstractNumId, isReusable: context.Converter.ContinueNumbering); @@ -158,13 +184,6 @@ private ListContext ConcretiseInstance(ParsingContext context, int abstractNumId instanceId = IncrementInstanceId(context, abstractNumId, isReusable: false); listContext = new ListContext(listStyle, abstractNumId, instanceId.Value, 1, dir); } - // be sure to restart to 1 any nested ordered list - else if (currentLevel > 0 && isOrderedTag) - { - instanceId = IncrementInstanceId(context, abstractNumId, isReusable: false); - overrideLevelIndex = currentLevel; - listContext = new ListContext(listStyle, abstractNumId, instanceId.Value, currentLevel + 1, dir); - } else { return new ListContext(listStyle, abstractNumId, instanceId.Value, currentLevel + 1, dir); @@ -197,20 +216,39 @@ private ListContext ConcretiseInstance(ParsingContext context, int abstractNumId private static string GetListName(IElement listNode, string? parentName = null) { var styleAttributes = listNode.GetStyles(); + bool orderedList = listNode.NodeName.Equals("ol", StringComparison.OrdinalIgnoreCase); string? type = styleAttributes["list-style-type"]; + if(orderedList && string.IsNullOrEmpty(type)) + { + type = ListTypeToListStyleType(listNode.GetAttribute("type")); + } + if (string.IsNullOrEmpty(type) || !supportedListTypes.Contains(type!)) { if (parentName != null && IsCascadingStyle(parentName)) return parentName!; - bool orderedList = listNode.NodeName.Equals("ol", StringComparison.OrdinalIgnoreCase); type = orderedList? "decimal" : "disc"; } return type!; } + /// + /// Map ordered list style attribute values to css list-style-type. + /// Valid types are "1|a|A|i|I": https://w3schools.com/tags/att_ol_type.asp + /// + private static string? ListTypeToListStyleType(string? type) => type switch + { + "1" => "decimal", + "a" => "lower-alpha", + "A" => "upper-alpha", + "i" => "lower-roman", + "I" => "upper-roman", + _ => null + }; + /// /// Resolve the of a list element node, /// based on its css class if provided and if matching. @@ -238,4 +276,4 @@ private static bool IsCascadingStyle(string styleName) { return styleName == "decimal-tiered"; } -} \ No newline at end of file +} diff --git a/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs b/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs index c7619824..ec489ef4 100644 --- a/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs +++ b/src/Html2OpenXml/Expressions/PhrasingElementExpression.cs @@ -1,4 +1,4 @@ -/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved * * This source is subject to the Microsoft Permissive License. * Please see the License.txt file for more information. @@ -12,10 +12,8 @@ using System; using System.Collections.Generic; using System.Globalization; -using System.Linq; using AngleSharp.Dom; using AngleSharp.Html.Dom; -using AngleSharp.Text; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Wordprocessing; @@ -56,7 +54,8 @@ protected virtual IEnumerable Interpret ( runs.Add(element); } } - return CombineRuns(runs); + + return runs; } public override void CascadeStyles(OpenXmlElement element) @@ -189,40 +188,4 @@ protected virtual void ComposeStyles (ParsingContext context) if (font.Size.IsFixed) runProperties.FontSize = new FontSize() { Val = Math.Round(font.Size.ValueInPoint * 2).ToString(CultureInfo.InvariantCulture) }; } - - /// - /// Mimics the behaviour of Html rendering when 2 consecutives runs are separated by a space. - /// - protected static IEnumerable CombineRuns(IEnumerable runs) - { - if (runs.Count() == 1) - { - yield return runs.First(); - yield break; - } - - bool endsWithSpace = true; - foreach (var run in runs) - { - var textElement = run.GetFirstChild(); - // run can be also a hyperlink - textElement ??= run.GetFirstChild()?.GetFirstChild(); - - if (textElement != null) // could be null when
    - { - var text = textElement.Text; - // we know that the text cannot be empty because we skip them in TextExpression - if (!endsWithSpace && !text[0].IsSpaceCharacter()) - { - yield return new Run(new Text(" ") { Space = SpaceProcessingModeValues.Preserve }); - } - endsWithSpace = text[text.Length - 1].IsSpaceCharacter(); - } - else if (run.LastChild is Break) - { - endsWithSpace = true; - } - yield return run; - } - } -} \ No newline at end of file +} diff --git a/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs b/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs index 4e816f89..f9d60817 100644 --- a/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs +++ b/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs @@ -31,9 +31,6 @@ public override IEnumerable Interpret (ParsingContext context) { var childElements = base.Interpret (context); - if (!childElements.Any()) // Word requires that the cell is not empty - childElements = [new Paragraph()]; - var cell = new TableCell (cellProperties); if (cellNode.ColumnSpan > 1) @@ -46,6 +43,13 @@ public override IEnumerable Interpret (ParsingContext context) cellProperties.VerticalMerge = new() { Val = MergedCellValues.Restart }; } + // Word requires at least one paragraph in a cell + // OpenXmlValidator does not catch this error + if (!childElements.Any(c => c is Paragraph)) + { + childElements = childElements.Append(new Paragraph()); + } + cell.Append(childElements); return [cell]; } diff --git a/src/Html2OpenXml/Expressions/Table/TableExpression.cs b/src/Html2OpenXml/Expressions/Table/TableExpression.cs index 84c1d029..3faea0ff 100644 --- a/src/Html2OpenXml/Expressions/Table/TableExpression.cs +++ b/src/Html2OpenXml/Expressions/Table/TableExpression.cs @@ -156,7 +156,7 @@ private static int GuessColumnsCount(IHtmlTableElement tableNode) } } - if (rows.Any()) + if (rows.Length > 0) columnCount = Math.Max(rows.Max(), columnCount); } @@ -186,6 +186,9 @@ protected override void ComposeStyles (ParsingContext context) tableProperties.TableWidth = new() { Type = TableWidthUnitValues.Dxa, Width = width.ValueInDxa.ToString(CultureInfo.InvariantCulture) }; break; + case UnitMetric.Auto: + tableProperties.TableWidth = new() { Width = "0", Type = TableWidthUnitValues.Auto }; + break; } foreach (string className in tableNode.ClassList) @@ -198,10 +201,6 @@ protected override void ComposeStyles (ParsingContext context) } } - var align = Converter.ToParagraphAlign(tableNode.GetAttribute("align")); - if (align.HasValue) - tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() }; - var dir = tableNode.GetTextDirection(); if (dir.HasValue) tableProperties.BiDiVisual = new() { @@ -243,7 +242,8 @@ protected override void ComposeStyles (ParsingContext context) tableProperties.TableBorders = tableBorders; } // is the border=0? If so, we remove the border regardless the style in use - else if (tableNode.Border == 0) + // but only remove border if the html style border was set, otherwise leave the border style as-is. + else if (!styleBorder.IsEmpty && tableNode.Border == 0) { tableProperties.TableBorders = new TableBorders() { TopBorder = new TopBorder { Val = BorderValues.None }, @@ -280,5 +280,22 @@ protected override void ComposeStyles (ParsingContext context) }; } } + + var align = Converter.ToParagraphAlign(tableNode.GetAttribute("align")) + ?? Converter.ToParagraphAlign(styleAttributes["justify-self"]); + if (!align.HasValue) + { + var margin = styleAttributes.GetMargin("margin"); + if (margin.Left.Type == UnitMetric.Auto) + { + if (margin.Right.Type == UnitMetric.Auto) + align = JustificationValues.Center; + else + align = JustificationValues.Right; + } + } + + if (align.HasValue) + tableProperties.TableJustification = new() { Val = align.Value.ToTableRowAlignment() }; } } diff --git a/src/Html2OpenXml/Expressions/TextExpression.cs b/src/Html2OpenXml/Expressions/TextExpression.cs index 427e364b..dd2705df 100644 --- a/src/Html2OpenXml/Expressions/TextExpression.cs +++ b/src/Html2OpenXml/Expressions/TextExpression.cs @@ -9,6 +9,10 @@ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A * PARTICULAR PURPOSE. */ +using System; +#if NET5_0_OR_GREATER +using System.Collections.Frozen; +#endif using System.Collections.Generic; using AngleSharp.Dom; using AngleSharp.Html.Dom; @@ -23,55 +27,161 @@ namespace HtmlToOpenXml.Expressions; ///
    sealed class TextExpression(INode node) : HtmlDomExpression { + static readonly ISet AllPhrasings = InitPhrasingSets(); private readonly INode node = node; + private static ISet InitPhrasingSets() + { + var sets = new HashSet(StringComparer.InvariantCultureIgnoreCase) { + TagNames.A, TagNames.Abbr, TagNames.B, TagNames.Big, TagNames.Cite, TagNames.Code, + TagNames.Del, TagNames.Dfn, TagNames.Em, TagNames.Font, TagNames.Hr, TagNames.I, + TagNames.Img, TagNames.Ins, TagNames.Kbd, TagNames.Mark, TagNames.NoBr, TagNames.Q, + TagNames.Rp, TagNames.Rt, TagNames.S, TagNames.Samp, TagNames.Small, TagNames.Span, + TagNames.Strike, TagNames.Strong, TagNames.Sub, TagNames.Sup, TagNames.Time, + TagNames.Tt, TagNames.U, TagNames.Var + }; + +#if NET5_0_OR_GREATER + return sets.ToFrozenSet(StringComparer.InvariantCultureIgnoreCase); +#else + return sets; +#endif + } + /// public override IEnumerable Interpret (ParsingContext context) { string text = node.TextContent.Normalize(); - if (text.Trim().Length == 0) return []; + + if (text.Length == 0) + return []; if (!context.PreserveLinebreaks) - text = text.CollapseLineBreaks(); - if (context.CollapseWhitespaces && text[0].IsWhiteSpaceCharacter() && - node.PreviousSibling is IHtmlImageElement) { - text = " " + text.CollapseAndStrip(); + text = text.CollapseLineBreaks(); + if (text.Length == 0) + return []; } - else if (context.CollapseWhitespaces) + + // https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace + // If there is a space between two phrasing elements, the user agent should collapse it to a single space character. + if (context.CollapseWhitespaces) + { + bool startsWithSpace = text[0].IsWhiteSpaceCharacter(), + endsWithSpace = text[text.Length - 1].IsWhiteSpaceCharacter(), + preserveBorderSpaces = AllPhrasings.Contains(node.Parent!.NodeName), + prevIsPhrasing = node.PreviousSibling is not null && + (AllPhrasings.Contains(node.PreviousSibling.NodeName) || node.PreviousSibling!.NodeType == NodeType.Text), + nextIsPhrasing = node.NextSibling is not null && + (AllPhrasings.Contains(node.NextSibling.NodeName) || node.NextSibling!.NodeType == NodeType.Text); + text = text.CollapseAndStrip(); + // keep a collapsed single space if it stands between 2 phrasings that respect. + // doesn't ends/starts with a whitespace + if (text.Length == 0 && prevIsPhrasing && nextIsPhrasing + && (endsWithSpace || startsWithSpace) + && !(node.PreviousSibling!.TextContent.Length == 0 + || node.NextSibling!.TextContent.Length == 0 + || node.PreviousSibling!.TextContent[node.PreviousSibling!.TextContent.Length - 1].IsWhiteSpaceCharacter() + || node.NextSibling!.TextContent[0].IsWhiteSpaceCharacter() + )) + { + return [new Run(new Text(" ") { Space = SpaceProcessingModeValues.Preserve })]; + } + // we strip out all whitespaces and we stand inside a div. Just skip this text content + if (text.Length == 0 && !preserveBorderSpaces) + { + return []; + } + + // if previous element is an image, append a space separator + // if this is a non-empty phrasing element, append a space separator + if (startsWithSpace && node.PreviousSibling is IHtmlImageElement) + { + text = " " + text; + } + else if (startsWithSpace && prevIsPhrasing + && node.PreviousSibling!.TextContent.Length > 0 + && !node.PreviousSibling!.TextContent[node.PreviousSibling.TextContent.Length - 1].IsWhiteSpaceCharacter()) + { + text = " " + text; + } + + if (endsWithSpace && ( + // next run is not starting with a linebreak + (nextIsPhrasing && node.NextSibling!.TextContent.Length > 0 && + !node.NextSibling!.TextContent[0].IsLineBreak()) || + // if there is no more text element or is empty, eat the trailing space + (preserveBorderSpaces && (node.NextSibling is not null + || node.Parent.NextSibling is not null)))) + { + text += " "; + } + } + + + if (text.Length == 0) + return []; + if (!context.PreserveLinebreaks) - return [new Run(new Text(text))]; + return [new Run(new Text(text) { Space = SpaceProcessingModeValues.Preserve })]; + + Run run = EscapeNewlines(text); + return [run]; + } + /// + /// Convert new lines to . + /// + private static Run EscapeNewlines(string text) + { var run = new Run(); - char[] chars = text.ToCharArray(); - int shift = 0, c = 0; bool wasCR = false; // avoid adding 2 breaks for \r\n - for ( ; c < chars.Length ; c++) + + int startIndex = 0; + for (int i = 0; i < text.Length; i++) { - if (!chars[c].IsLineBreak()) - { - wasCR = false; + if (!IsLineBreak(text[i], ref wasCR)) continue; - } - if (wasCR) continue; - wasCR = chars[c] == Symbols.CarriageReturn; - - if (c > 1) + // Add the text before the newline character + if (i > startIndex) { - run.Append(new Text(new string(chars, shift, c - shift)) + run.Append(new Text(text.Substring(startIndex, i - startIndex)) { Space = SpaceProcessingModeValues.Preserve }); run.Append(new Break()); } - shift = c + 1; + + startIndex = i + 1; } - if (c > shift) - run.Append(new Text(new string(chars, shift, c - shift)) + // Add any remaining text after the last newline character + if (startIndex < text.Length) + { + run.Append(new Text(text.Substring(startIndex)) { Space = SpaceProcessingModeValues.Preserve }); + } - return [run]; + return run; + } + + private static bool IsLineBreak(char ch, ref bool wasCR) + { + if (ch == Symbols.CarriageReturn) + { + wasCR = true; + return true; + } + + if (ch == Symbols.LineFeed && wasCR) + { + // Skip LF character after CR to avoid adding an extra break for CR-LF sequence + wasCR = false; + return false; + } + + wasCR = false; + return ch == Symbols.LineFeed; } } diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index f834ecf2..5fb7f06a 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -15,6 +15,7 @@ using System.Threading; using System.Threading.Tasks; using AngleSharp; +using AngleSharp.Html.Dom; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; @@ -125,7 +126,8 @@ public async Task ParseHeader(string html, HeaderFooterValues? headerType = null var paragraphs = await ParseCoreAsync(html, headerPart, headerImageLoader, new ParallelOptions() { CancellationToken = cancellationToken }, - htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle)); + htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle)) + .ConfigureAwait(false); headerPart.Header.Append(paragraphs); } @@ -149,7 +151,8 @@ public async Task ParseFooter(string html, HeaderFooterValues? footerType = null var paragraphs = await ParseCoreAsync(html, footerPart, footerImageLoader, new ParallelOptions() { CancellationToken = cancellationToken }, - htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle)); + htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle)) + .ConfigureAwait(false); footerPart.Footer.Append(paragraphs); } @@ -165,7 +168,8 @@ public async Task ParseBody(string html, CancellationToken cancellationToken = d bodyImageLoader ??= new ImagePrefetcher(mainPart, webRequester); var paragraphs = await ParseCoreAsync(html, mainPart, bodyImageLoader, new ParallelOptions() { CancellationToken = cancellationToken }, - htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.Paragraph)); + htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.Paragraph)) + .ConfigureAwait(false); if (!paragraphs.Any()) return; @@ -274,13 +278,14 @@ private async Task> ParseCoreAsync(string h /// /// Walk through all the img tags and preload all the remote images. /// - private async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, + private static async Task PreloadImages(AngleSharp.Dom.IDocument htmlDocument, IImageLoader imageLoader, ParallelOptions parallelOptions) { var imageUris = htmlDocument.QuerySelectorAll("img[src]") - .Cast() + .Cast() .Where(e => AngleSharpExtensions.TryParseUrl(e.GetAttribute("src"), UriKind.RelativeOrAbsolute, out var _)) - .Select(e => e.GetAttribute("src")!); + .Select(e => e.GetAttribute("src")!) + .Distinct(); if (!imageUris.Any()) return; diff --git a/src/Html2OpenXml/HtmlToOpenXml.csproj b/src/Html2OpenXml/HtmlToOpenXml.csproj index bfffc341..917d8d34 100644 --- a/src/Html2OpenXml/HtmlToOpenXml.csproj +++ b/src/Html2OpenXml/HtmlToOpenXml.csproj @@ -9,13 +9,13 @@ HtmlToOpenXml HtmlToOpenXml HtmlToOpenXml.dll - 3.2.2 + 3.2.5 icon.png Copyright 2009-$([System.DateTime]::Now.Year) Olivier Nizet - (Please write the package release notes in CHANGELOG.md) + See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.md README.md office openxml netcore html - 3.2.2 + 3.2.5 MIT https://github.com/onizet/html2openxml https://github.com/onizet/html2openxml @@ -44,7 +44,7 @@ - + @@ -64,15 +64,5 @@ true - - - - - - - - @(ReleaseNoteLines, '%0a') - - \ No newline at end of file diff --git a/src/Html2OpenXml/IO/DataUri.cs b/src/Html2OpenXml/IO/DataUri.cs index 918783fa..394ffaf4 100755 --- a/src/Html2OpenXml/IO/DataUri.cs +++ b/src/Html2OpenXml/IO/DataUri.cs @@ -22,7 +22,7 @@ namespace HtmlToOpenXml.IO; public sealed class DataUri { private readonly static Regex dataUriRegex = new Regex( - @"data\:(?\w+/\w+)?(?:;charset=(?[a-zA-Z_0-9-]+))?(?;base64)?,(?.*)", + @"data\:(?\w+/[\w\-\+\.]+)?(?:;charset=(?[a-zA-Z_0-9-]+))?(?;base64)?,(?.*)", RegexOptions.IgnoreCase | RegexOptions.Singleline); private DataUri(string mime, byte[] data) diff --git a/src/Html2OpenXml/IO/DefaultWebRequest.cs b/src/Html2OpenXml/IO/DefaultWebRequest.cs index 9414a02b..9125c7db 100644 --- a/src/Html2OpenXml/IO/DefaultWebRequest.cs +++ b/src/Html2OpenXml/IO/DefaultWebRequest.cs @@ -126,7 +126,13 @@ public DefaultWebRequest(HttpClient httpClient, ILogger? logger = null) resource.StatusCode = response.StatusCode; if (response.IsSuccessStatusCode) + { resource.Content = await response.Content.ReadAsStreamAsync().ConfigureAwait(false); + if (response.Content.Headers.TryGetValues("Content-Type", out var mime)) + { + resource.Headers.Add("Content-Type", string.Join(", ", mime)); + } + } foreach (var header in response.Headers) resource.Headers.Add(header.Key, string.Join(", ", header.Value)); diff --git a/src/Html2OpenXml/IO/ImageHeader.cs b/src/Html2OpenXml/IO/ImageHeader.cs index 858a923d..94a1a518 100755 --- a/src/Html2OpenXml/IO/ImageHeader.cs +++ b/src/Html2OpenXml/IO/ImageHeader.cs @@ -58,12 +58,10 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml } /// Returns true if the detection was successful. public static bool TryDetectFileType(Stream stream, out FileType type) { - using (SequentialBinaryReader reader = new SequentialBinaryReader(stream, leaveOpen: true)) - { - type = DetectFileType(reader); - stream.Seek(0L, SeekOrigin.Begin); - return type != FileType.Unrecognized; - } + using var reader = new SequentialBinaryReader(stream, leaveOpen: true); + type = DetectFileType(reader); + stream.Seek(0L, SeekOrigin.Begin); + return type != FileType.Unrecognized; } /// @@ -74,21 +72,19 @@ public static bool TryDetectFileType(Stream stream, out FileType type) /// The image was of an unrecognised format. public static Size GetDimensions(Stream stream) { - using (SequentialBinaryReader reader = new SequentialBinaryReader(stream, leaveOpen: true)) + using var reader = new SequentialBinaryReader(stream, leaveOpen: true); + FileType type = DetectFileType(reader); + stream.Seek(0L, SeekOrigin.Begin); + return type switch { - FileType type = DetectFileType (reader); - stream.Seek(0L, SeekOrigin.Begin); - switch (type) - { - case FileType.Bitmap: return DecodeBitmap(reader); - case FileType.Gif: return DecodeGif(reader); - case FileType.Jpeg: return DecodeJfif(reader); - case FileType.Png: return DecodePng(reader); - case FileType.Emf: return DecodeEmf(reader); - case FileType.Xml: return DecodeXml(stream); - default: return Size.Empty; - } - } + FileType.Bitmap => DecodeBitmap(reader), + FileType.Gif => DecodeGif(reader), + FileType.Jpeg => DecodeJfif(reader), + FileType.Png => DecodePng(reader), + FileType.Emf => DecodeEmf(reader), + FileType.Xml => DecodeXml(stream), + _ => Size.Empty, + }; } /// @@ -123,36 +119,25 @@ public static Size KeepAspectRatio(Size actualSize, Size preferredSize) private static FileType DetectFileType (SequentialBinaryReader reader) { byte[] magicBytes = new byte[MaxMagicBytesLength]; - for (int i = 0; i < MaxMagicBytesLength; i += 1) - { - magicBytes[i] = reader.ReadByte(); - foreach (var kvPair in imageFormatDecoders) - { - if (StartsWith(magicBytes, kvPair.Key)) - { - return kvPair.Value; - } - } - } + var availableBytes = reader.BaseStream.Length - reader.BaseStream.Position; + // reasonably, we can assume that if we are at the end of the stream and we read the header, + // the image content must be invalid or truncated. + if (availableBytes < MaxMagicBytesLength) + return FileType.Unrecognized; - return FileType.Unrecognized; - } + reader.Read(magicBytes, 0, MaxMagicBytesLength); - /// - /// Determines whether the beginning of this byte array instance matches the specified byte array. - /// - /// Returns true if the first array starts with the bytes of the second array. - private static bool StartsWith(byte[] thisBytes, byte[] thatBytes) - { - for (int i = 0; i < thatBytes.Length; i += 1) + var headerSpan = magicBytes.AsSpan(); + foreach (var kvPair in imageFormatDecoders) { - if (thisBytes[i] != thatBytes[i]) + // Determines whether the beginning of this array matches s known header. + if (headerSpan.StartsWith(kvPair.Key)) { - return false; + return kvPair.Value; } } - return true; + return FileType.Unrecognized; } private static Size DecodeBitmap(SequentialBinaryReader reader) @@ -220,7 +205,7 @@ private static Size DecodeJfif(SequentialBinaryReader reader) return Size.Empty; // next 2-bytes are : [high-byte] [low-byte] - var segmentLength = (int)reader.ReadUInt16(); + int segmentLength = reader.ReadUInt16(); // segment length includes size bytes, so subtract two segmentLength -= 2; @@ -228,8 +213,8 @@ private static Size DecodeJfif(SequentialBinaryReader reader) if (segmentType == 0xC0 || segmentType == 0xC2) { reader.ReadByte(); // bits/sample, usually 8 - int height = (int) reader.ReadUInt16(); - int width = (int) reader.ReadUInt16(); + int height = reader.ReadUInt16(); + int width = reader.ReadUInt16(); return new Size(width, height); } else diff --git a/src/Html2OpenXml/IO/ImagePrefetcher.cs b/src/Html2OpenXml/IO/ImagePrefetcher.cs index 9ff6dfc3..062ad113 100644 --- a/src/Html2OpenXml/IO/ImagePrefetcher.cs +++ b/src/Html2OpenXml/IO/ImagePrefetcher.cs @@ -1,259 +1,261 @@ -/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved - * - * This source is subject to the Microsoft Permissive License. - * Please see the License.txt file for more information. - * All other rights reserved. - * - * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY - * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A - * PARTICULAR PURPOSE. - */ -using System; -using System.Collections.Generic; -using System.IO; -using System.Threading; -using System.Threading.Tasks; -using DocumentFormat.OpenXml.Packaging; - -namespace HtmlToOpenXml.IO; - -interface IImageLoader -{ - /// - /// Download the remote or local image located at the specified url. - /// - Task Download(string imageUri, CancellationToken cancellationToken); -} - -/// -/// Download and provison the metadata of a requested image. -/// -sealed class ImagePrefetcher : IImageLoader - where T: OpenXmlPartContainer, ISupportedRelationship -{ - // Map extension to PartTypeInfo - private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) { - { ".gif", ImagePartType.Gif }, - { ".bmp", ImagePartType.Bmp }, - { ".emf", ImagePartType.Emf }, - { ".ico", ImagePartType.Icon }, - { ".jp2", ImagePartType.Jp2 }, - { ".jpeg", ImagePartType.Jpeg }, - { ".jpg", ImagePartType.Jpeg }, - { ".jpe", ImagePartType.Jpeg }, - { ".pcx", ImagePartType.Pcx }, - { ".png", ImagePartType.Png }, - { ".svg", ImagePartType.Svg }, - { ".tif", ImagePartType.Tif }, - { ".tiff", ImagePartType.Tiff }, - { ".wmf", ImagePartType.Wmf } - }; - private readonly T hostingPart; - private readonly IWebRequest resourceLoader; - private readonly HtmlImageInfoCollection prefetchedImages; - - - /// - /// Constructor. - /// - /// The image will be linked to that hosting part. - /// Images are not shared between header, footer and body. - /// Service to resolve an image. - public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) - { - this.hostingPart = hostingPart; - this.resourceLoader = resourceLoader; - this.prefetchedImages = new HtmlImageInfoCollection(); - } - - //____________________________________________________________________ - // - // Public Functionality - - /// - /// Download the remote or local image located at the specified url. - /// - public async Task Download(string imageUri, CancellationToken cancellationToken) - { - if (prefetchedImages.Contains(imageUri)) - return prefetchedImages[imageUri]; - - HtmlImageInfo? iinfo; - if (DataUri.IsWellFormed(imageUri)) // data inline, encoded in base64 - { - iinfo = ReadDataUri(imageUri); - } - else - { - iinfo = await DownloadRemoteImage(imageUri, cancellationToken); - } - - if (iinfo != null) - prefetchedImages.Add(iinfo); - - return iinfo; - } - - /// - /// Download the image and try to find its format type. - /// - private async Task DownloadRemoteImage(string src, CancellationToken cancellationToken) - { - Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute); - if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme)) - return null; - - Resource? response; - - response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false); - if (response?.Content == null) - return null; - - using (response) - { - // For requested url with no filename, we need to read the media mime type if provided - response.Headers.TryGetValue("Content-Type", out var mime); - if (!TryInspectMimeType(mime, out PartTypeInfo type) - && !TryGuessTypeFromUri(imageUri, out type) - && !TryGuessTypeFromStream(response.Content, out type)) - { - return null; - } - - var ipart = hostingPart.AddImagePart(type); - Size originalSize; - using (var outputStream = ipart.GetStream(FileMode.Create)) - { - response.Content.CopyTo(outputStream); - - outputStream.Seek(0L, SeekOrigin.Begin); - originalSize = GetImageSize(outputStream); - } - - return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) { - TypeInfo = type, - Size = originalSize - }; - } - } - - /// - /// Parse the Data inline image. - /// - private HtmlImageInfo? ReadDataUri(string src) - { - if (DataUri.TryCreate(src, out var dataUri)) - { - Size originalSize; - knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type); - var ipart = hostingPart.AddImagePart(type); - using (var outputStream = ipart.GetStream(FileMode.Create)) - { - outputStream.Write(dataUri.Data, 0, dataUri.Data.Length); - - outputStream.Seek(0L, SeekOrigin.Begin); - originalSize = GetImageSize(outputStream); - } - - return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) { - TypeInfo = type, - Size = originalSize - }; - } - - return null; - } - - //____________________________________________________________________ - // - // Private Implementation - - // http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature - private static readonly Dictionary knownContentType = new(StringComparer.OrdinalIgnoreCase) { - { "image/gif", ImagePartType.Gif }, - { "image/pjpeg", ImagePartType.Jpeg }, - { "image/jp2", ImagePartType.Jp2 }, - { "image/jpg", ImagePartType.Jpeg }, - { "image/jpeg", ImagePartType.Jpeg }, - { "image/x-png", ImagePartType.Png }, - { "image/png", ImagePartType.Png }, - { "image/tiff", ImagePartType.Tiff }, - { "image/vnd.microsoft.icon", ImagePartType.Icon }, - // these icons mime type are wrong but we should nevertheless take care (http://en.wikipedia.org/wiki/ICO_%28file_format%29#MIME_type) - { "image/x-icon", ImagePartType.Icon }, - { "image/icon", ImagePartType.Icon }, - { "image/ico", ImagePartType.Icon }, - { "text/ico", ImagePartType.Icon }, - { "text/application-ico", ImagePartType.Icon }, - { "image/bmp", ImagePartType.Bmp }, - { "image/svg+xml", ImagePartType.Svg }, - }; - - /// - /// Inspect the response headers of a web request and decode the mime type if provided - /// - /// Returns the extension of the image if provideds. - private static bool TryInspectMimeType(string? contentType, out PartTypeInfo type) - { - // can be null when the protocol used doesn't allow response headers - if (contentType != null && - knownContentType.TryGetValue(contentType, out type)) - return true; - - type = default; - return false; - } - - /// - /// Gets the OpenXml PartTypeInfo associated to an image. - /// - private static bool TryGuessTypeFromUri(Uri uri, out PartTypeInfo type) - { - string extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.Segments[uri.Segments.Length - 1] : uri.OriginalString); - if (knownExtensions.TryGetValue(extension, out type)) return true; - - // extension not recognized, try with checking the query string. Expecting to resolve something like: - // ./image.axd?picture=img1.jpg - extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.AbsoluteUri : uri.ToString()); - if (knownExtensions.TryGetValue(extension, out type)) return true; - - return false; - } - - /// - /// Gets the OpenXml PartTypeInfo associated to an image. - /// - private static bool TryGuessTypeFromStream(Stream stream, out PartTypeInfo type) - { - if (ImageHeader.TryDetectFileType(stream, out ImageHeader.FileType guessType)) - { - switch (guessType) - { - case ImageHeader.FileType.Bitmap: type = ImagePartType.Bmp; return true; - case ImageHeader.FileType.Emf: type = ImagePartType.Emf; return true; - case ImageHeader.FileType.Gif: type = ImagePartType.Gif; return true; - case ImageHeader.FileType.Jpeg: type = ImagePartType.Jpeg; return true; - case ImageHeader.FileType.Png: type = ImagePartType.Png; return true; - } - } - type = ImagePartType.Bmp; - return false; - } - - /// - /// Loads an image from a stream and grab its size. - /// - private static Size GetImageSize(Stream imageStream) - { - // Read only the size of the image - try - { - return ImageHeader.GetDimensions(imageStream); - } - catch (ArgumentException) - { - return Size.Empty; - } - } -} +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved + * + * This source is subject to the Microsoft Permissive License. + * Please see the License.txt file for more information. + * All other rights reserved. + * + * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY + * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A + * PARTICULAR PURPOSE. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using DocumentFormat.OpenXml.Packaging; + +namespace HtmlToOpenXml.IO; + +interface IImageLoader +{ + /// + /// Download the remote or local image located at the specified url. + /// + Task Download(string imageUri, CancellationToken cancellationToken); +} + +/// +/// Download and provison the metadata of a requested image. +/// +sealed class ImagePrefetcher : IImageLoader + where T: OpenXmlPartContainer, ISupportedRelationship +{ + // Map extension to PartTypeInfo + private static readonly Dictionary knownExtensions = new(StringComparer.OrdinalIgnoreCase) { + { ".gif", ImagePartType.Gif }, + { ".bmp", ImagePartType.Bmp }, + { ".emf", ImagePartType.Emf }, + { ".ico", ImagePartType.Icon }, + { ".jp2", ImagePartType.Jp2 }, + { ".jpeg", ImagePartType.Jpeg }, + { ".jpg", ImagePartType.Jpeg }, + { ".jpe", ImagePartType.Jpeg }, + { ".pcx", ImagePartType.Pcx }, + { ".png", ImagePartType.Png }, + { ".svg", ImagePartType.Svg }, + { ".tif", ImagePartType.Tif }, + { ".tiff", ImagePartType.Tiff }, + { ".wmf", ImagePartType.Wmf } + }; + private readonly T hostingPart; + private readonly IWebRequest resourceLoader; + private readonly HtmlImageInfoCollection prefetchedImages; + + + /// + /// Constructor. + /// + /// The image will be linked to that hosting part. + /// Images are not shared between header, footer and body. + /// Service to resolve an image. + public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) + { + this.hostingPart = hostingPart; + this.resourceLoader = resourceLoader; + this.prefetchedImages = new HtmlImageInfoCollection(); + } + + //____________________________________________________________________ + // + // Public Functionality + + /// + /// Download the remote or local image located at the specified url. + /// + public async Task Download(string imageUri, CancellationToken cancellationToken) + { + if (prefetchedImages.Contains(imageUri)) + return prefetchedImages[imageUri]; + + HtmlImageInfo? iinfo; + if (DataUri.IsWellFormed(imageUri)) // data inline, encoded in base64 + { + iinfo = ReadDataUri(imageUri); + } + else + { + iinfo = await DownloadRemoteImage(imageUri, cancellationToken).ConfigureAwait(false); + } + + if (iinfo != null) + prefetchedImages.Add(iinfo); + + return iinfo; + } + + /// + /// Download the image and try to find its format type. + /// + private async Task DownloadRemoteImage(string src, CancellationToken cancellationToken) + { + Uri imageUri = new Uri(src, UriKind.RelativeOrAbsolute); + if (imageUri.IsAbsoluteUri && !resourceLoader.SupportsProtocol(imageUri.Scheme)) + return null; + + Resource? response; + + response = await resourceLoader.FetchAsync(imageUri, cancellationToken).ConfigureAwait(false); + if (response?.Content == null) + return null; + + using (response) + { + // For requested url with no filename, we need to read the media mime type if provided + response.Headers.TryGetValue("Content-Type", out var mime); + if (!TryInspectMimeType(mime, out PartTypeInfo type) + && !TryGuessTypeFromUri(imageUri, out type) + && !TryGuessTypeFromStream(response.Content, out type)) + { + return null; + } + + var ipart = hostingPart.AddImagePart(type); + Size originalSize; + using (var outputStream = ipart.GetStream(FileMode.Create)) + { + response.Content.CopyTo(outputStream); + + outputStream.Seek(0L, SeekOrigin.Begin); + originalSize = GetImageSize(outputStream); + } + + return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) { + TypeInfo = type, + Size = originalSize + }; + } + } + + /// + /// Parse the Data inline image. + /// + private HtmlImageInfo? ReadDataUri(string src) + { + if (DataUri.TryCreate(src, out var dataUri)) + { + Size originalSize; + knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type); + var ipart = hostingPart.AddImagePart(type); + using (var outputStream = ipart.GetStream(FileMode.Create)) + { + outputStream.Write(dataUri.Data, 0, dataUri.Data.Length); + + outputStream.Seek(0L, SeekOrigin.Begin); + originalSize = GetImageSize(outputStream); + } + + return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) { + TypeInfo = type, + Size = originalSize + }; + } + + return null; + } + + //____________________________________________________________________ + // + // Private Implementation + + // http://stackoverflow.com/questions/58510/using-net-how-can-you-find-the-mime-type-of-a-file-based-on-the-file-signature + private static readonly Dictionary knownContentType = new(StringComparer.OrdinalIgnoreCase) { + { "image/gif", ImagePartType.Gif }, + { "image/pjpeg", ImagePartType.Jpeg }, + { "image/jp2", ImagePartType.Jp2 }, + { "image/jpg", ImagePartType.Jpeg }, + { "image/jpeg", ImagePartType.Jpeg }, + { "image/x-png", ImagePartType.Png }, + { "image/png", ImagePartType.Png }, + { "image/tiff", ImagePartType.Tiff }, + { "image/emf", ImagePartType.Emf }, + { "image/x-emf", ImagePartType.Emf }, + { "image/vnd.microsoft.icon", ImagePartType.Icon }, + // these icons mime type are wrong but we should nevertheless take care (http://en.wikipedia.org/wiki/ICO_%28file_format%29#MIME_type) + { "image/x-icon", ImagePartType.Icon }, + { "image/icon", ImagePartType.Icon }, + { "image/ico", ImagePartType.Icon }, + { "text/ico", ImagePartType.Icon }, + { "text/application-ico", ImagePartType.Icon }, + { "image/bmp", ImagePartType.Bmp }, + { "image/svg+xml", ImagePartType.Svg }, + }; + + /// + /// Inspect the response headers of a web request and decode the mime type if provided + /// + /// Returns the extension of the image if provideds. + private static bool TryInspectMimeType(string? contentType, out PartTypeInfo type) + { + // can be null when the protocol used doesn't allow response headers + if (contentType != null && + knownContentType.TryGetValue(contentType, out type)) + return true; + + type = default; + return false; + } + + /// + /// Gets the OpenXml PartTypeInfo associated to an image. + /// + private static bool TryGuessTypeFromUri(Uri uri, out PartTypeInfo type) + { + string extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.Segments[uri.Segments.Length - 1] : uri.OriginalString); + if (knownExtensions.TryGetValue(extension, out type)) return true; + + // extension not recognized, try with checking the query string. Expecting to resolve something like: + // ./image.axd?picture=img1.jpg + extension = Path.GetExtension(uri.IsAbsoluteUri ? uri.AbsoluteUri : uri.ToString()); + if (knownExtensions.TryGetValue(extension, out type)) return true; + + return false; + } + + /// + /// Gets the OpenXml PartTypeInfo associated to an image. + /// + private static bool TryGuessTypeFromStream(Stream stream, out PartTypeInfo type) + { + if (ImageHeader.TryDetectFileType(stream, out ImageHeader.FileType guessType)) + { + switch (guessType) + { + case ImageHeader.FileType.Bitmap: type = ImagePartType.Bmp; return true; + case ImageHeader.FileType.Emf: type = ImagePartType.Emf; return true; + case ImageHeader.FileType.Gif: type = ImagePartType.Gif; return true; + case ImageHeader.FileType.Jpeg: type = ImagePartType.Jpeg; return true; + case ImageHeader.FileType.Png: type = ImagePartType.Png; return true; + } + } + type = ImagePartType.Bmp; + return false; + } + + /// + /// Loads an image from a stream and grab its size. + /// + private static Size GetImageSize(Stream imageStream) + { + // Read only the size of the image + try + { + return ImageHeader.GetDimensions(imageStream); + } + catch (ArgumentException) + { + return Size.Empty; + } + } +} diff --git a/src/Html2OpenXml/Primitives/HtmlColor.Named.cs b/src/Html2OpenXml/Primitives/HtmlColor.Named.cs new file mode 100644 index 00000000..37a42fb9 --- /dev/null +++ b/src/Html2OpenXml/Primitives/HtmlColor.Named.cs @@ -0,0 +1,180 @@ +using System; +#if NET5_0_OR_GREATER +using System.Collections.Frozen; +#endif +using System.Collections.Generic; + +namespace HtmlToOpenXml; + +/// +/// Helper class to translate a named color to its ARGB representation. +/// +partial struct HtmlColor +{ + private static readonly IReadOnlyDictionary namedColors = InitKnownColors(); + + private static HtmlColor GetNamedColor (ReadOnlySpan name) + { + // the longest built-in Color's name is much lower than this check, so we should not allocate here in a typical usage + Span loweredValue = name.Length <= 128 ? stackalloc char[name.Length] : new char[name.Length]; + + name.ToLowerInvariant(loweredValue); + + namedColors.TryGetValue(loweredValue.ToString(), out var color); + return color; + } + + private static IReadOnlyDictionary InitKnownColors() + { + var colors = new Dictionary() + { + { "black", Black }, + { "white", FromArgb(255,255,255) }, + { "aliceblue", FromArgb(240, 248, 255) }, + { "lightsalmon", FromArgb(255, 160, 122) }, + { "antiquewhite", FromArgb(250, 235, 215) }, + { "lightseagreen", FromArgb(32, 178, 170) }, + { "aqua", FromArgb(0, 255, 255) }, + { "lightskyblue", FromArgb(135, 206, 250) }, + { "aquamarine", FromArgb(127, 255, 212) }, + { "lightslategray", FromArgb(119, 136, 153) }, + { "azure", FromArgb(240, 255, 255) }, + { "lightsteelblue", FromArgb(176, 196, 222) }, + { "beige", FromArgb(245, 245, 220) }, + { "lightyellow", FromArgb(255, 255, 224) }, + { "bisque", FromArgb(255, 228, 196) }, + { "lime", FromArgb(0, 255, 0) }, + { "limegreen", FromArgb(50, 205, 50) }, + { "blanchedalmond", FromArgb(255, 255, 205) }, + { "linen", FromArgb(250, 240, 230) }, + { "blue", FromArgb(0, 0, 255) }, + { "magenta", FromArgb(255, 0, 255) }, + { "blueviolet", FromArgb(138, 43, 226) }, + { "maroon", FromArgb(128, 0, 0) }, + { "brown", FromArgb(165, 42, 42) }, + { "mediumaquamarine", FromArgb(102, 205, 170) }, + { "burlywood", FromArgb(222, 184, 135) }, + { "mediumblue", FromArgb(0, 0, 205) }, + { "cadetblue", FromArgb(95, 158, 160) }, + { "mediumprchid", FromArgb(186, 85, 211) }, + { "chartreuse", FromArgb(127, 255, 0) }, + { "mediumpurple", FromArgb(147, 112, 219) }, + { "chocolate", FromArgb(210, 105, 30) }, + { "mediumseagreen", FromArgb(60, 179, 113) }, + { "coral", FromArgb(255, 127, 80) }, + { "mediumslateblue", FromArgb(123, 104, 238) }, + { "cornflowerblue", FromArgb(100, 149, 237) }, + { "mediumspringbreen", FromArgb(0, 250, 154) }, + { "cornsilk", FromArgb(255, 248, 220) }, + { "mediumturquoise", FromArgb(72, 209, 204) }, + { "crimson", FromArgb(220, 20, 60) }, + { "mediumvioletred", FromArgb(199, 21, 112) }, + { "cyan", FromArgb(0, 255, 255) }, + { "midnightblue", FromArgb(25, 25, 112) }, + { "darkblue", FromArgb(0, 0, 139) }, + { "mintcream", FromArgb(245, 255, 250) }, + { "darkcyan", FromArgb(0, 139, 139) }, + { "mistyrose", FromArgb(255, 228, 225) }, + { "darkgoldenrod", FromArgb(184, 134, 11) }, + { "moccasin", FromArgb(255, 228, 181) }, + { "darkgray", FromArgb(169, 169, 169) }, + { "navajowhite", FromArgb(255, 222, 173) }, + { "darkgreen", FromArgb(0, 100, 0) }, + { "navy", FromArgb(0, 0, 128) }, + { "darkkhaki", FromArgb(189, 183, 107) }, + { "oldlace", FromArgb(253, 245, 230) }, + { "darkmagenta", FromArgb(139, 0, 139) }, + { "olive", FromArgb(128, 128, 0) }, + { "darkolivegreen", FromArgb(85, 107, 47) }, + { "olivedrab", FromArgb(107, 142, 45) }, + { "darkorange", FromArgb(255, 140, 0) }, + { "orange", FromArgb(255, 165, 0) }, + { "darkorchid", FromArgb(153, 50, 204) }, + { "orangered", FromArgb(255, 69, 0) }, + { "darkred", FromArgb(139, 0, 0) }, + { "orchid", FromArgb(218, 112, 214) }, + { "darksalmon", FromArgb(233, 150, 122) }, + { "palegoldenrod", FromArgb(238, 232, 170) }, + { "darkseagreen", FromArgb(143, 188, 143) }, + { "palegreen", FromArgb(152, 251, 152) }, + { "darkslateblue", FromArgb(72, 61, 139) }, + { "paleturquoise", FromArgb(175, 238, 238) }, + { "darkslategray", FromArgb(40, 79, 79) }, + { "palevioletred", FromArgb(219, 112, 147) }, + { "darkturquoise", FromArgb(0, 206, 209) }, + { "papayawhip", FromArgb(255, 239, 213) }, + { "darkviolet", FromArgb(148, 0, 211) }, + { "peachpuff", FromArgb(255, 218, 155) }, + { "deeppink", FromArgb(255, 20, 147) }, + { "peru", FromArgb(205, 133, 63) }, + { "deepskyblue", FromArgb(0, 191, 255) }, + { "pink", FromArgb(255, 192, 203) }, + { "dimgray", FromArgb(105, 105, 105) }, + { "plum", FromArgb(221, 160, 221) }, + { "dodgerblue", FromArgb(30, 144, 255) }, + { "powderblue", FromArgb(176, 224, 230) }, + { "firebrick", FromArgb(178, 34, 34) }, + { "purple", FromArgb(128, 0, 128) }, + { "floralwhite", FromArgb(255, 250, 240) }, + { "red", FromArgb(255, 0, 0) }, + { "forestgreen", FromArgb(34, 139, 34) }, + { "rosybrown", FromArgb(188, 143, 143) }, + { "fuschia", FromArgb(255, 0, 255) }, + { "royalblue", FromArgb(65, 105, 225) }, + { "gainsboro", FromArgb(220, 220, 220) }, + { "saddlebrown", FromArgb(139, 69, 19) }, + { "ghostwhite", FromArgb(248, 248, 255) }, + { "salmon", FromArgb(250, 128, 114) }, + { "gold", FromArgb(255, 215, 0) }, + { "sandybrown", FromArgb(244, 164, 96) }, + { "goldenrod", FromArgb(218, 165, 32) }, + { "seagreen", FromArgb(46, 139, 87) }, + { "gray", FromArgb(128, 128, 128) }, + { "seashell", FromArgb(255, 245, 238) }, + { "green", FromArgb(0, 128, 0) }, + { "sienna", FromArgb(160, 82, 45) }, + { "greenyellow", FromArgb(173, 255, 47) }, + { "silver", FromArgb(192, 192, 192) }, + { "honeydew", FromArgb(240, 255, 240) }, + { "skyblue", FromArgb(135, 206, 235) }, + { "hotpink", FromArgb(255, 105, 180) }, + { "slateblue", FromArgb(106, 90, 205) }, + { "indianred", FromArgb(205, 92, 92) }, + { "slategray", FromArgb(112, 128, 144) }, + { "indigo", FromArgb(75, 0, 130) }, + { "snow", FromArgb(255, 250, 250) }, + { "ivory", FromArgb(255, 240, 240) }, + { "springgreen", FromArgb(0, 255, 127) }, + { "khaki", FromArgb(240, 230, 140) }, + { "steelblue", FromArgb(70, 130, 180) }, + { "lavender", FromArgb(230, 230, 250) }, + { "tan", FromArgb(210, 180, 140) }, + { "lavenderblush", FromArgb(255, 240, 245) }, + { "teal", FromArgb(0, 128, 128) }, + { "lawngreen", FromArgb(124, 252, 0) }, + { "thistle", FromArgb(216, 191, 216) }, + { "lemonchiffon", FromArgb(255, 250, 205) }, + { "tomato", FromArgb(253, 99, 71) }, + { "lightblue", FromArgb(173, 216, 230) }, + { "turquoise", FromArgb(64, 224, 208) }, + { "lightcoral", FromArgb(240, 128, 128) }, + { "violet", FromArgb(238, 130, 238) }, + { "lightcyan", FromArgb(224, 255, 255) }, + { "wheat", FromArgb(245, 222, 179) }, + { "lightgoldenrodyellow", FromArgb(250, 250, 210) }, + { "lightgreen", FromArgb(144, 238, 144) }, + { "whitesmoke", FromArgb(245, 245, 245) }, + { "lightgray", FromArgb(211, 211, 211) }, + { "yellow", FromArgb(255, 255, 0) }, + { "Lightpink", FromArgb(255, 182, 193) }, + { "yellowgreen", FromArgb(154, 205, 50) }, + { "transparent", FromArgb(0, 0, 0, 0) } + }; + +#if NET5_0_OR_GREATER + return colors.ToFrozenDictionary(); +#else + return colors; +#endif + } +} \ No newline at end of file diff --git a/src/Html2OpenXml/Primitives/HtmlColor.cs b/src/Html2OpenXml/Primitives/HtmlColor.cs index 799063a6..2879aede 100755 --- a/src/Html2OpenXml/Primitives/HtmlColor.cs +++ b/src/Html2OpenXml/Primitives/HtmlColor.cs @@ -17,7 +17,7 @@ namespace HtmlToOpenXml; /// /// Represents an ARGB color. /// -readonly struct HtmlColor : IEquatable +readonly partial struct HtmlColor : IEquatable { private static readonly char[] hexDigits = { '0', '1', '2', '3', '4', '5', '6', '7', @@ -115,7 +115,7 @@ public static HtmlColor Parse(string? htmlColor) throw; } - return HtmlColorTranslator.FromHtml(htmlColor); + return GetNamedColor(htmlColor.AsSpan()); } /// diff --git a/src/Html2OpenXml/Utilities/CollectionExtensions.cs b/src/Html2OpenXml/Utilities/CollectionExtensions.cs index b66fb959..42aa8d2a 100644 --- a/src/Html2OpenXml/Utilities/CollectionExtensions.cs +++ b/src/Html2OpenXml/Utilities/CollectionExtensions.cs @@ -38,7 +38,7 @@ public static Task ForEachAsync(this IEnumerable source, var throttler = new SemaphoreSlim(initialCount: Math.Max(1, parallelOptions.MaxDegreeOfParallelism)); var tasks = System.Linq.Enumerable.Select(source, async item => { - await throttler.WaitAsync(parallelOptions.CancellationToken); + await throttler.WaitAsync(parallelOptions.CancellationToken).ConfigureAwait(false); if (parallelOptions.CancellationToken.IsCancellationRequested) return; try diff --git a/src/Html2OpenXml/Utilities/HtmlColorTranslator.cs b/src/Html2OpenXml/Utilities/HtmlColorTranslator.cs deleted file mode 100755 index ff59cb92..00000000 --- a/src/Html2OpenXml/Utilities/HtmlColorTranslator.cs +++ /dev/null @@ -1,168 +0,0 @@ -using System; -using System.Collections.Generic; - -namespace HtmlToOpenXml; - -/// -/// Helper class to translate a named color to its ARGB representation. -/// -static class HtmlColorTranslator -{ - private static readonly Dictionary namedColors = InitKnownColors(); - - public static HtmlColor FromHtml (string htmlColor) - { - namedColors.TryGetValue(htmlColor, out var color); - return color; - } - - private static Dictionary InitKnownColors() - { - var colors = new Dictionary(StringComparer.OrdinalIgnoreCase) - { - { "Black", HtmlColor.Black }, - { "White", HtmlColor.FromArgb(255,255,255) }, - { "AliceBlue", HtmlColor.FromArgb(240, 248, 255) }, - { "LightSalmon", HtmlColor.FromArgb(255, 160, 122) }, - { "AntiqueWhite", HtmlColor.FromArgb(250, 235, 215) }, - { "LightSeaGreen", HtmlColor.FromArgb(32, 178, 170) }, - { "Aqua", HtmlColor.FromArgb(0, 255, 255) }, - { "LightSkyBlue", HtmlColor.FromArgb(135, 206, 250) }, - { "Aquamarine", HtmlColor.FromArgb(127, 255, 212) }, - { "LightSlateGray", HtmlColor.FromArgb(119, 136, 153) }, - { "Azure", HtmlColor.FromArgb(240, 255, 255) }, - { "LightSteelBlue", HtmlColor.FromArgb(176, 196, 222) }, - { "Beige", HtmlColor.FromArgb(245, 245, 220) }, - { "LightYellow", HtmlColor.FromArgb(255, 255, 224) }, - { "Bisque", HtmlColor.FromArgb(255, 228, 196) }, - { "Lime", HtmlColor.FromArgb(0, 255, 0) }, - { "LimeGreen", HtmlColor.FromArgb(50, 205, 50) }, - { "BlanchedAlmond", HtmlColor.FromArgb(255, 255, 205) }, - { "Linen", HtmlColor.FromArgb(250, 240, 230) }, - { "Blue", HtmlColor.FromArgb(0, 0, 255) }, - { "Magenta", HtmlColor.FromArgb(255, 0, 255) }, - { "BlueViolet", HtmlColor.FromArgb(138, 43, 226) }, - { "Maroon", HtmlColor.FromArgb(128, 0, 0) }, - { "Brown", HtmlColor.FromArgb(165, 42, 42) }, - { "MediumAquamarine", HtmlColor.FromArgb(102, 205, 170) }, - { "BurlyWood", HtmlColor.FromArgb(222, 184, 135) }, - { "MediumBlue", HtmlColor.FromArgb(0, 0, 205) }, - { "CadetBlue", HtmlColor.FromArgb(95, 158, 160) }, - { "MediumOrchid", HtmlColor.FromArgb(186, 85, 211) }, - { "Chartreuse", HtmlColor.FromArgb(127, 255, 0) }, - { "MediumPurple", HtmlColor.FromArgb(147, 112, 219) }, - { "Chocolate", HtmlColor.FromArgb(210, 105, 30) }, - { "MediumSeaGreen", HtmlColor.FromArgb(60, 179, 113) }, - { "Coral", HtmlColor.FromArgb(255, 127, 80) }, - { "MediumSlateBlue", HtmlColor.FromArgb(123, 104, 238) }, - { "CornflowerBlue", HtmlColor.FromArgb(100, 149, 237) }, - { "MediumSpringGreen", HtmlColor.FromArgb(0, 250, 154) }, - { "Cornsilk", HtmlColor.FromArgb(255, 248, 220) }, - { "MediumTurquoise", HtmlColor.FromArgb(72, 209, 204) }, - { "Crimson", HtmlColor.FromArgb(220, 20, 60) }, - { "MediumVioletRed", HtmlColor.FromArgb(199, 21, 112) }, - { "Cyan", HtmlColor.FromArgb(0, 255, 255) }, - { "MidnightBlue", HtmlColor.FromArgb(25, 25, 112) }, - { "DarkBlue", HtmlColor.FromArgb(0, 0, 139) }, - { "MintCream", HtmlColor.FromArgb(245, 255, 250) }, - { "DarkCyan", HtmlColor.FromArgb(0, 139, 139) }, - { "MistyRose", HtmlColor.FromArgb(255, 228, 225) }, - { "DarkGoldenrod", HtmlColor.FromArgb(184, 134, 11) }, - { "Moccasin", HtmlColor.FromArgb(255, 228, 181) }, - { "DarkGray", HtmlColor.FromArgb(169, 169, 169) }, - { "NavajoWhite", HtmlColor.FromArgb(255, 222, 173) }, - { "DarkGreen", HtmlColor.FromArgb(0, 100, 0) }, - { "Navy", HtmlColor.FromArgb(0, 0, 128) }, - { "DarkKhaki", HtmlColor.FromArgb(189, 183, 107) }, - { "OldLace", HtmlColor.FromArgb(253, 245, 230) }, - { "DarkMagenta", HtmlColor.FromArgb(139, 0, 139) }, - { "Olive", HtmlColor.FromArgb(128, 128, 0) }, - { "DarkOliveGreen", HtmlColor.FromArgb(85, 107, 47) }, - { "OliveDrab", HtmlColor.FromArgb(107, 142, 45) }, - { "DarkOrange", HtmlColor.FromArgb(255, 140, 0) }, - { "Orange", HtmlColor.FromArgb(255, 165, 0) }, - { "DarkOrchid", HtmlColor.FromArgb(153, 50, 204) }, - { "OrangeRed", HtmlColor.FromArgb(255, 69, 0) }, - { "DarkRed", HtmlColor.FromArgb(139, 0, 0) }, - { "Orchid", HtmlColor.FromArgb(218, 112, 214) }, - { "DarkSalmon", HtmlColor.FromArgb(233, 150, 122) }, - { "PaleGoldenrod", HtmlColor.FromArgb(238, 232, 170) }, - { "DarkSeaGreen", HtmlColor.FromArgb(143, 188, 143) }, - { "PaleGreen", HtmlColor.FromArgb(152, 251, 152) }, - { "DarkSlateBlue", HtmlColor.FromArgb(72, 61, 139) }, - { "PaleTurquoise", HtmlColor.FromArgb(175, 238, 238) }, - { "DarkSlateGray", HtmlColor.FromArgb(40, 79, 79) }, - { "PaleVioletRed", HtmlColor.FromArgb(219, 112, 147) }, - { "DarkTurquoise", HtmlColor.FromArgb(0, 206, 209) }, - { "PapayaWhip", HtmlColor.FromArgb(255, 239, 213) }, - { "DarkViolet", HtmlColor.FromArgb(148, 0, 211) }, - { "PeachPuff", HtmlColor.FromArgb(255, 218, 155) }, - { "DeepPink", HtmlColor.FromArgb(255, 20, 147) }, - { "Peru", HtmlColor.FromArgb(205, 133, 63) }, - { "DeepSkyBlue", HtmlColor.FromArgb(0, 191, 255) }, - { "Pink", HtmlColor.FromArgb(255, 192, 203) }, - { "DimGray", HtmlColor.FromArgb(105, 105, 105) }, - { "Plum", HtmlColor.FromArgb(221, 160, 221) }, - { "DodgerBlue", HtmlColor.FromArgb(30, 144, 255) }, - { "PowderBlue", HtmlColor.FromArgb(176, 224, 230) }, - { "Firebrick", HtmlColor.FromArgb(178, 34, 34) }, - { "Purple", HtmlColor.FromArgb(128, 0, 128) }, - { "FloralWhite", HtmlColor.FromArgb(255, 250, 240) }, - { "Red", HtmlColor.FromArgb(255, 0, 0) }, - { "ForestGreen", HtmlColor.FromArgb(34, 139, 34) }, - { "RosyBrown", HtmlColor.FromArgb(188, 143, 143) }, - { "Fuschia", HtmlColor.FromArgb(255, 0, 255) }, - { "RoyalBlue", HtmlColor.FromArgb(65, 105, 225) }, - { "Gainsboro", HtmlColor.FromArgb(220, 220, 220) }, - { "SaddleBrown", HtmlColor.FromArgb(139, 69, 19) }, - { "GhostWhite", HtmlColor.FromArgb(248, 248, 255) }, - { "Salmon", HtmlColor.FromArgb(250, 128, 114) }, - { "Gold", HtmlColor.FromArgb(255, 215, 0) }, - { "SandyBrown", HtmlColor.FromArgb(244, 164, 96) }, - { "Goldenrod", HtmlColor.FromArgb(218, 165, 32) }, - { "SeaGreen", HtmlColor.FromArgb(46, 139, 87) }, - { "Gray", HtmlColor.FromArgb(128, 128, 128) }, - { "Seashell", HtmlColor.FromArgb(255, 245, 238) }, - { "Green", HtmlColor.FromArgb(0, 128, 0) }, - { "Sienna", HtmlColor.FromArgb(160, 82, 45) }, - { "GreenYellow", HtmlColor.FromArgb(173, 255, 47) }, - { "Silver", HtmlColor.FromArgb(192, 192, 192) }, - { "Honeydew", HtmlColor.FromArgb(240, 255, 240) }, - { "SkyBlue", HtmlColor.FromArgb(135, 206, 235) }, - { "HotPink", HtmlColor.FromArgb(255, 105, 180) }, - { "SlateBlue", HtmlColor.FromArgb(106, 90, 205) }, - { "IndianRed", HtmlColor.FromArgb(205, 92, 92) }, - { "SlateGray", HtmlColor.FromArgb(112, 128, 144) }, - { "Indigo", HtmlColor.FromArgb(75, 0, 130) }, - { "Snow", HtmlColor.FromArgb(255, 250, 250) }, - { "Ivory", HtmlColor.FromArgb(255, 240, 240) }, - { "SpringGreen", HtmlColor.FromArgb(0, 255, 127) }, - { "Khaki", HtmlColor.FromArgb(240, 230, 140) }, - { "SteelBlue", HtmlColor.FromArgb(70, 130, 180) }, - { "Lavender", HtmlColor.FromArgb(230, 230, 250) }, - { "Tan", HtmlColor.FromArgb(210, 180, 140) }, - { "LavenderBlush", HtmlColor.FromArgb(255, 240, 245) }, - { "Teal", HtmlColor.FromArgb(0, 128, 128) }, - { "LawnGreen", HtmlColor.FromArgb(124, 252, 0) }, - { "Thistle", HtmlColor.FromArgb(216, 191, 216) }, - { "LemonChiffon", HtmlColor.FromArgb(255, 250, 205) }, - { "Tomato", HtmlColor.FromArgb(253, 99, 71) }, - { "LightBlue", HtmlColor.FromArgb(173, 216, 230) }, - { "Turquoise", HtmlColor.FromArgb(64, 224, 208) }, - { "LightCoral", HtmlColor.FromArgb(240, 128, 128) }, - { "Violet", HtmlColor.FromArgb(238, 130, 238) }, - { "LightCyan", HtmlColor.FromArgb(224, 255, 255) }, - { "Wheat", HtmlColor.FromArgb(245, 222, 179) }, - { "LightGoldenrodYellow", HtmlColor.FromArgb(250, 250, 210) }, - { "LightGreen", HtmlColor.FromArgb(144, 238, 144) }, - { "WhiteSmoke", HtmlColor.FromArgb(245, 245, 245) }, - { "LightGray", HtmlColor.FromArgb(211, 211, 211) }, - { "Yellow", HtmlColor.FromArgb(255, 255, 0) }, - { "LightPink", HtmlColor.FromArgb(255, 182, 193) }, - { "YellowGreen", HtmlColor.FromArgb(154, 205, 50) }, - { "Transparent", HtmlColor.FromArgb(0, 0, 0, 0) } - }; - - return colors; - } -} \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/AbbrTests.cs b/test/HtmlToOpenXml.Tests/AbbrTests.cs index 2f65ad1f..7a6cb88d 100644 --- a/test/HtmlToOpenXml.Tests/AbbrTests.cs +++ b/test/HtmlToOpenXml.Tests/AbbrTests.cs @@ -175,8 +175,9 @@ public void InsideParagraph_ReturnsMultipleRuns() Assert.That(elements, Has.Count.EqualTo(1)); Assert.Multiple(() => { Assert.That(elements[0], Is.TypeOf(typeof(Paragraph))); - Assert.That(elements[0].Elements().Count(), Is.EqualTo(6), "3 textual runs + 3 breaks"); + Assert.That(elements[0].Elements().Count(), Is.EqualTo(4), "4 runs: Pre abbr, abbr, quote number, post abbr"); Assert.That(elements[0].Elements().Any(r => r.HasChild()), Is.True); + Assert.That(elements[0].InnerText, Is.EqualTo("The NASA is an independent agency of the U.S. federal government responsible for the civil space program, aeronautics research, and space research.")); }); } diff --git a/test/HtmlToOpenXml.Tests/ElementTests.cs b/test/HtmlToOpenXml.Tests/ElementTests.cs index cb2f55c6..a82685e3 100644 --- a/test/HtmlToOpenXml.Tests/ElementTests.cs +++ b/test/HtmlToOpenXml.Tests/ElementTests.cs @@ -28,7 +28,6 @@ public void PhrasingTag_ReturnsRunWithDefaultStyle (string html) where T : Op [TestCase(@"Superscript", ExpectedResult = "superscript")] public string? SubSup_ReturnsRunWithVerticalAlignment (string html) { - //var val = new VerticalPositionValues(tagName); var textAlign = ParsePhrasing(html); Assert.That(textAlign.Val?.HasValue, Is.True); return textAlign.Val.InnerText; @@ -154,9 +153,32 @@ public void FigCaption_ReturnsRunWithSimpleField() Assert.Multiple(() => { - Assert.That(elements[0].ChildElements, Has.Count.EqualTo(3)); Assert.That(elements[0].HasChild(), Is.True); Assert.That(elements[0].HasChild(), Is.True); + Assert.That(elements[0].Elements().Count(), Is.EqualTo(3)); + Assert.That(elements[0].GetFirstChild()!.KeepNext, Is.Null); + Assert.That(elements[0].GetFirstChild()!.Instruction?.Value, Does.Contain("SEQ Figure \\* ARABIC")); + }); + } + + [Test] + public void FigCaption_WithHeading_ReturnsParagraphWithSimpleField() + { + var elements = converter.Parse(@"
    +

    Puppy School

    +

    Championship Class of 2016

    +
    "); + + Assert.That(elements, Has.Count.EqualTo(2)); + Assert.That(elements, Is.All.TypeOf()); + + Assert.Multiple(() => + { + Assert.That(elements[0].HasChild(), Is.True); + Assert.That(elements[0].HasChild(), Is.True); + Assert.That(elements[0].GetFirstChild()!.KeepNext, Is.Not.Null); + Assert.That(elements[1].HasChild(), Is.True); + Assert.That(elements[1].HasChild(), Is.False); }); } diff --git a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs index 8bb71936..c680f685 100644 --- a/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs +++ b/test/HtmlToOpenXml.Tests/HeaderFooterTests.cs @@ -148,9 +148,17 @@ await converter.ParseFooter(@" var footer = mainPart.FooterParts.FirstOrDefault()?.Footer; Assert.That(footer, Is.Not.Null); var paragraphs = footer.Elements(); - Assert.That(paragraphs.Count(), Is.EqualTo(2)); - Assert.That(paragraphs.Select(p => p.ParagraphProperties?.ParagraphStyleId?.Val?.Value), - Has.All.EqualTo(converter.HtmlStyles.DefaultStyles.FooterStyle)); + Assert.That(paragraphs.Count(), Is.EqualTo(1)); + var paragraph = paragraphs.First(); + Assert.Multiple(() => + { + Assert.That(paragraph.Elements().Count, Is.EqualTo(2), "One whitespace and one for "); + Assert.That(paragraph.Elements().Count, Is.EqualTo(1)); + Assert.That(paragraph.GetFirstChild()?.InnerText, Is.EqualTo("Copyrighted but you can use what's here as long as you credit me")); + Assert.That(paragraph.GetLastChild()?.InnerText, Is.EqualTo("© Copyright 2058, Company Inc.")); + Assert.That(paragraphs.Select(p => p.ParagraphProperties?.ParagraphStyleId?.Val?.Value), + Has.All.EqualTo(converter.HtmlStyles.DefaultStyles.FooterStyle)); + }); } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/HeadingTests.cs b/test/HtmlToOpenXml.Tests/HeadingTests.cs index b8feed11..532befcd 100644 --- a/test/HtmlToOpenXml.Tests/HeadingTests.cs +++ b/test/HtmlToOpenXml.Tests/HeadingTests.cs @@ -101,5 +101,27 @@ public void MaxLevel_ShouldBeIgnored() Is.Null, $"Only {maxLevel+1} levels of heading supported"); }); } + + [Test(Description = "Heading with number but no text should be ignored (issue #189)")] + public void NumberingWithNoTextPattern_ReturnsSimpleHeading() + { + var elements = converter.Parse("

    00

    "); + + var absNum = mainPart.NumberingDefinitionsPart?.Numbering + .Elements() + .Where(abs => abs.AbstractNumDefinitionName?.Val == NumberingExpressionBase.HeadingNumberingName) + .SingleOrDefault(); + Assert.That(absNum, Is.Null); + + var paragraphs = elements.Cast(); + Assert.Multiple(() => + { + Assert.That(paragraphs.Count(), Is.EqualTo(1)); + Assert.That(paragraphs.First().InnerText, Is.EqualTo("00")); + Assert.That(paragraphs.First().ParagraphProperties?.NumberingProperties?.NumberingLevelReference?.Val, + Is.Null, + "First paragraph is not a numbering"); + }); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj b/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj index e781a424..31c2bd53 100755 --- a/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj +++ b/test/HtmlToOpenXml.Tests/HtmlToOpenXml.Tests.csproj @@ -12,15 +12,15 @@ en - + runtime; build; native; contentfiles; analyzers; buildtransitive all - + - - - + + + all runtime; build; native; contentfiles; analyzers diff --git a/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs b/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs index 8dabcd50..2c197f67 100644 --- a/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs +++ b/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs @@ -63,5 +63,15 @@ public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName) Assert.That(success, Is.EqualTo(true)); return guessType; } + + [Test(ExpectedResult = ImageHeader.FileType.Unrecognized)] + public ImageHeader.FileType GuessFormat_WithEmpty_ReturnsFileType() + { + using var memoryStream = new MemoryStream(); + bool success = ImageHeader.TryDetectFileType(memoryStream, out var guessType); + + Assert.That(success, Is.EqualTo(false)); + return guessType; + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 45aa66dd..7bd01cb8 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -3,6 +3,8 @@ using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using Moq; +using System.Net.Http; +using System.Net.Http.Headers; namespace HtmlToOpenXml.Tests { @@ -16,12 +18,26 @@ namespace HtmlToOpenXml.Tests public class ImgTests : HtmlConverterTestBase { [TestCase("https://www.w3schools.com/tags/smiley.gif", "image/gif")] - [TestCase("https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/helloworld.svg", "image/svg+xml")] - public void AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType) + [TestCase("https://upload.wikimedia.org/wikipedia/commons/b/b0/Mozilla_dinosaur_head_logo.svg", "image/svg+xml")] + public async Task AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType) { - var elements = converter.Parse(@$"Smiley face"); - Assert.That(elements, Has.Count.EqualTo(1)); - var (_, imagePart) = AssertIsImg(mainPart, elements[0]); + var mockHttp = new MockHttpMessageHandler(uri => Task.FromResult(new HttpResponseMessage { + StatusCode = System.Net.HttpStatusCode.OK, + Content = new StreamContent(ResourceHelper.GetStream("Resources." + Path.GetFileName(imageUri))) { + Headers = { { "Content-Type", contentType } } + } + })); + + var webRequest = new IO.DefaultWebRequest(new HttpClient(mockHttp)); + converter = new HtmlConverter(mainPart, webRequest); + + await converter.ParseBody( + @$"", + TestContext.CurrentContext.CancellationToken); + + var paragraphs = mainPart.Document.Body!.Elements(); + Assert.That(paragraphs.Count(), Is.EqualTo(1)); + var (_, imagePart) = AssertIsImg(mainPart, paragraphs.First()); Assert.That(imagePart.ContentType, Is.EqualTo(contentType)); } @@ -36,7 +52,7 @@ public void DataUri_ReturnsDrawing_WithDecryptedData() [Test] public void WithBorder_ReturnsRunWithBorder() { - var elements = converter.Parse(@""); + var elements = converter.Parse(@""); AssertIsImg(mainPart, elements[0]); var run = elements[0].GetFirstChild(); var runProperties = run?.GetFirstChild(); @@ -44,6 +60,16 @@ public void WithBorder_ReturnsRunWithBorder() Assert.That(runProperties.Border, Is.Not.Null); } + [Test] + public void PercentageSize_ReturnsDrawing_WithSizeRelativeToPage() + { + var elements = converter.Parse(@""); + AssertIsImg(mainPart, elements[0]); + var drawing = elements[0].GetFirstChild()!.GetFirstChild()!; + Assert.That(drawing.Inline?.Extent?.Cx?.Value, Is.EqualTo(6115050)); + Assert.That(drawing.Inline?.Extent?.Cy?.Value, Is.EqualTo(6115050)); + } + [Test] public void ManualProvisioning_ReturnsDrawing_WithProvidedData() { @@ -128,7 +154,8 @@ public async Task RemoteImage_WithBaseUri_ShouldSucceed() converter = new HtmlConverter(mainPart, new IO.DefaultWebRequest() { BaseImageUrl = new Uri("http://github.com/onizet/html2openxml") }); - var elements = await converter.ParseAsync($""); + var elements = await converter.ParseAsync($"", + TestContext.CurrentContext.CancellationToken); Assert.That(elements, Is.Not.Empty); AssertIsImg(mainPart, elements.First()); } @@ -235,6 +262,34 @@ public bool CenterImg_ReturnsFramedImg(string displayMode) Justification?.Val?.Value == JustificationValues.Center; } + [Test] + public async Task DuplicateImgSource__DownloadOnce() + { + var webRequest = new Mock(); + webRequest.Setup(x => x.FetchAsync(It.IsAny(), It.IsAny())) + .Returns(Task.FromResult(new() { + Content = new MemoryStream(Convert.FromBase64String(@"/9j/4AAQSkZJRgABAQAAAQABAAD/4gKgSUNDX1BST0ZJTEUAAQEAAAKQbGNtcwQwAABtbnRyUkdCIFhZWiAH4QAHAAEAAAABAAZhY3NwQVBQTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLWxjbXMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAtkZXNjAAABCAAAADhjcHJ0AAABQAAAAE53dHB0AAABkAAAABRjaGFkAAABpAAAACxyWFlaAAAB0AAAABRiWFlaAAAB5AAAABRnWFlaAAAB+AAAABRyVFJDAAACDAAAACBnVFJDAAACLAAAACBiVFJDAAACTAAAACBjaHJtAAACbAAAACRtbHVjAAAAAAAAAAEAAAAMZW5VUwAAABwAAAAcAHMAUgBHAEIAIABiAHUAaQBsAHQALQBpAG4AAG1sdWMAAAAAAAAAAQAAAAxlblVTAAAAMgAAABwATgBvACAAYwBvAHAAeQByAGkAZwBoAHQALAAgAHUAcwBlACAAZgByAGUAZQBsAHkAAAAAWFlaIAAAAAAAAPbWAAEAAAAA0y1zZjMyAAAAAAABDEoAAAXj///zKgAAB5sAAP2H///7ov///aMAAAPYAADAlFhZWiAAAAAAAABvlAAAOO4AAAOQWFlaIAAAAAAAACSdAAAPgwAAtr5YWVogAAAAAAAAYqUAALeQAAAY3nBhcmEAAAAAAAMAAAACZmYAAPKnAAANWQAAE9AAAApbcGFyYQAAAAAAAwAAAAJmZgAA8qcAAA1ZAAAT0AAACltwYXJhAAAAAAADAAAAAmZmAADypwAADVkAABPQAAAKW2Nocm0AAAAAAAMAAAAAo9cAAFR7AABMzQAAmZoAACZmAAAPXP/bAEMABQMEBAQDBQQEBAUFBQYHDAgHBwcHDwsLCQwRDxISEQ8RERMWHBcTFBoVEREYIRgaHR0fHx8TFyIkIh4kHB4fHv/bAEMBBQUFBwYHDggIDh4UERQeHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHv/CABEIAX4BfgMBIgACEQEDEQH/xAAcAAEBAAIDAQEAAAAAAAAAAAAAAQcIBAUGAgP/xAAUAQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIQAxAAAAHMoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADxmKzYPj6q8c22/fUTuzaFh3KZzwAAAAAAAAAAAAAAAAAAflhJjUlAlIonZ9ZTZH1epuyB6IAAAAAAAAAAAAAAAADxvstezwv1BYFlhYFIX1PlRt19+O9iAAAAAAAAAAAAAAAAfOpu0+px9AlCUIsFCKMrZmwFns+gAAAAAAAAAAAAAAAfhqXt3rAdDYKgsCoKQqDI2dcUZYKAAAAAAAAAAAAAAABiPLnENTna9UShKEUATk8fJ5kn0nH5AAAAAAAAAAAAAAAAAB5jXbbDzxrHfWeSFQWBeTkg6LOTtSgAAAAAAAAAAAAAAAAAA/Pyvrhhvp89/Jgv0OU/o873fIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH//EACYQAAEEAQQBBAMBAAAAAAAAAAQBAgMFBgARMFASFCAxkBMVIiP/2gAIAQEAAQUC+xZXI3XqhtMmif1lvkINep+TWpOpZppdbaT+VDtrERazMF3EKgLh6Z7msbkeSSkr7684qvnoLiG1g6XMrlSJuEQiYQijso7ILo8ssf19Xx4xYOr7Ni+TeizYv1N1x7axUz1dV0Ll2QuVZy+TAZtk6E534wm/HJhDlQ5PjoJm+cPirV5MIZ/u346HIhlEuuTCh1YF0Wfg7t4xYXEEVECRRdEVBGTBbASVx3FilarUgZ4M6O/qYrQU0WcMjgoKhxDgoPBOlt6wWyhtqA0F3tGHmJfT4+jHCCozqHsa5LCjCL0TibNPxYnePF36ExoVijVzY2xxNYnV+Ka8U+xj/8QAFBEBAAAAAAAAAAAAAAAAAAAAkP/aAAgBAwEBPwEQP//EABQRAQAAAAAAAAAAAAAAAAAAAJD/2gAIAQIBAT8BED//xAA7EAABAgIGBAoJBQEAAAAAAAABAgMAEQQSITFBUCIwUXEFEBMjMlJhYpHBIDNCU3KQobHhFDSBgpLR/9oACAEBAAY/AvmLaRA3x+4Z/wBiNFxB3HLC3W5d7qN4bzBDbgoyNjYt8THOvOOnvKnF30iYmD2RzNMdl1VGY8IqcIsWe8aH3EB2jupcQcU5OVKIAFpMGjcHrU2z7TgvXu2ajlqK6UHEYK3xZoPJ6beTK4PoypMoscI9s7N2qQ+wsoWk2Ql5Nirlp2HJDyapPu6DfZtPFdqk28y7oL8jE8jU0OhRxUG+861lSjNQFVW8ZFOHnyfWLUrxOtpDPereI/GRPr6raj9NXjxvDuD75EtHWEoqm8WcV3oXem+v4RkdKawr1huNutC/erreQyNrhBA6Og55axthF6zKEpSJJQJDI1sOiaFiRhdFdwtSqXSG3V/qnEmusaPYmJZJUVouJ9WuV34g0ekIqrH13akUikIk17KZdP8AEVjfk1R9EzgrEQSlJfa6yRb/ACPSqMNKWezCA5SpOrwTgP8AsTN+UWiJrYTPaLD4xzTzqd9sWUlEvgjTpR/qiBWbW8e+fKAkJSlIwAlFgyy6LvmMf//EACkQAAIBAQcEAQUBAAAAAAAAAAABESExQVFhcZGhUIGx8OEQQMHR8ZD/2gAIAQEAAT8h/wA710aRVOqD0j8nAesp0ufpLhst7S5eQ8aTLVnWnNQO5zpvy2RwbP0T8iHAjOKvu+ptBFqSdopqvw+xYb1Zw8Hk+jpKtIcJK+pYmhQeYnLyQlFElt8FlqJImzwX/BfVF1CzGsqpYLf+Jo0NjWPWdVmsU+jNgsSzo26HL0Ikj2GU9RGa5Ix8EZ8M9sIzQlrsSBzPD7p4p4FtH1tb9aX6dEbWBzP0Vy0JRSB5CvqGpu4KwWXfRqbuCq/RTDyVHC2oosVXwPgShehva3BqXob+OxGuxuQ89jcinwVIyexuJR/Dc3KrUy0AOdroN97ehIc1xkgExGa+DQ0Njvwd+D2z6aMpA/o6QVF4DF3QaI/gGYlNXYvBGFexGTIz4Y8yJvWwlqR7BrJF8kRaIvkXcUOYcmqDTLoK2ENNbuoGzVVbUsaoyrdTRNivqIk7HZsRlwZEUEqWIrgivqHuFT9jEhV0FjQpp2p5DzBGRDuIIuggh4EQRAkQQ8CMUMUqs7FH7BWdCdpu7m633cdy8g9sKE3/AE9sL5JpaXECCKVHFive1SPsOmlC+zf2Cjb9eQrJ6UbZa/jIIzZKIvngjEiHayUQrZVMiNSFbJZ+ErVcbV+NRKk+0X2FDU91TfB4tgMk5eiXNr08S+3wb/TRLZFt3gri/e5ZUlJWIe6Rb1ZmvT0FwvRlEVsqPxT9QiZLKYtWe6lCh2NPv8kMrd5I9n5IfrOKJtTbhd2QTGHe+eLgQpYSSp0eBIHzev8AyVTw9pfwxHuxsnw2PPvfmY3M4EbIR6akxEeR0uExttTYSbmwklYv9F//2gAMAwEAAgADAAAAEPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPOEPPPPPPPPPPPPPPPPPPKOEGPPPPPPPPPPPPPPPPPPPCAAAEPPPPPPPPPPPPPPPPPPLIAAAAEHPPPPPPPPPPPPPPPPFAAABAABPPPPPPPPPPPPPPPPBAAEAAABPPPPPPPPOPPPPPPPLCAAAAFPPPPPPPPPPHPPPPPPPHCAAAJPPPPPPPPPPPPPPPPPPPDLDPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPOPPPPPPPPPPPPPPPPPPPPPPPDPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPKPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP/EABQRAQAAAAAAAAAAAAAAAAAAAJD/2gAIAQMBAT8QED//xAAUEQEAAAAAAAAAAAAAAAAAAACQ/9oACAECAQE/EBA//8QAKxAAAQIDBgUFAQEAAAAAAAAAAQARECExIEFQcZHwUWGB0fEwkKGx4UDB/9oACAEBAAE/EPcVFib9eIQprKgdJjoC+FDEzxCNghCi8ZAABmUKmHgw60qRAr29L1UAZaD5iCF/QZQDg4BhRWEVRQI8k9OZbJzKRuQgAobAUgFw5oQgArA6aeDWDdEmcxPW4CbDAFGouiAfaeRCGDirM/QEazHUgX4HNgh9n/WQDCADYQEjkjIaEN2JTzYnEVTVIwMiE1EnaBJQaAGgTQA0EMjIeBEXbOkYDVgVA4HKdAFxiAX/AEiDbApZAPsLiGAhzNCQEngp5EA2FZhBM1oEBT+F5gmnkwEAoLjfAY7sDmWdgA1RA2daHCFshEhnZDYEKQa73fHQjAGReR6KBAGQe7wu1KjgXLO/9RHRgPhEMAprWEOeN9Eh3QmfZr+QAU9cvFp3kiSDtswrwDYAzFsAAADSaU9+AIAZNsEGZWJHT3AhVaoEDcEKfMQAxAEM6J0yBXCYMQsCSdEkkDgACCSP9K2BPlADCLqEAB0wAs1A7DogDMHcCCA6egIcoAggieI4BucuMo4wQrApAUZBhiCGoCraShe4xMP/2Q==")), + StatusCode = System.Net.HttpStatusCode.OK + })); + converter = new HtmlConverter(mainPart, webRequest.Object); + + await converter.ParseBody(@" + + + "); + + webRequest.Verify(s => s.FetchAsync( + It.IsAny(), + It.IsAny()), + Times.Once); + Assert.That(mainPart.ImageParts.Count(), Is.EqualTo(1)); + var paragraph = mainPart.Document.Body!.GetFirstChild(); + Assert.That(paragraph, Is.Not.Null); + var runs = paragraph.Elements(); + Assert.That(runs.Count(), Is.EqualTo(2)); + Assert.That(runs.Select(r => r.GetFirstChild()), Has.All.Not.Null); + } + private static (Drawing, ImagePart) AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) { var run = paragraph.GetFirstChild(); diff --git a/test/HtmlToOpenXml.Tests/LinkTests.cs b/test/HtmlToOpenXml.Tests/LinkTests.cs index f688bef8..3aa1e738 100644 --- a/test/HtmlToOpenXml.Tests/LinkTests.cs +++ b/test/HtmlToOpenXml.Tests/LinkTests.cs @@ -61,7 +61,7 @@ public void ImageFigcaptionLink_ReturnsHyperlinkWithTextAndImage () Assert.That(elements[0].FirstChild, Is.TypeOf(typeof(Hyperlink))); var hyperlink = (Hyperlink) elements[0].FirstChild; - Assert.That(hyperlink.ChildElements, Has.Count.EqualTo(4)); + Assert.That(hyperlink.ChildElements, Has.Count.EqualTo(6)); Assert.That(hyperlink.ChildElements, Has.All.TypeOf(typeof(Run)), "Hyperlinks don't accept inner paragraphs"); Assert.That(hyperlink.Descendants(), Is.Not.Null); } @@ -143,16 +143,15 @@ public void InlineWithText_ReturnsMultipleRunsWithHyperlink() Assert.That(elements[0], Is.TypeOf(typeof(Paragraph))); Assert.Multiple(() => { Assert.That(elements[0].ElementAt(0), Is.TypeOf()); - Assert.That(elements[0].ElementAt(1), Is.TypeOf()); - Assert.That(elements[0].ElementAt(2), Is.TypeOf()); - Assert.That(elements[0].ElementAt(3), Is.TypeOf()); + Assert.That(elements[0].ElementAt(1), Is.TypeOf()); + Assert.That(elements[0].ElementAt(2), Is.TypeOf()); }); } [Test(Description = "Many runs inside the link should respect whitespaces")] public void WithMultipleRun_ReturnsHyperlinkWithMultipleRuns() { - var elements = converter.Parse(@"Html to OpenXml!"); + var elements = converter.Parse(@"Html to OpenXml !"); Assert.That(elements, Has.Count.EqualTo(1)); Assert.That(elements[0], Is.TypeOf(typeof(Paragraph))); var h = elements[0].GetFirstChild(); diff --git a/test/HtmlToOpenXml.Tests/NumberingTests.cs b/test/HtmlToOpenXml.Tests/NumberingTests.cs index c48b6994..bd8f593c 100644 --- a/test/HtmlToOpenXml.Tests/NumberingTests.cs +++ b/test/HtmlToOpenXml.Tests/NumberingTests.cs @@ -549,7 +549,7 @@ await converter.ParseBody(@"
    1. Item1
    2. Item2
      1. Item 2.1
      -
    "); + ",TestContext.CurrentContext.CancellationToken); var absNum = mainPart.NumberingDefinitionsPart?.Numbering .Elements() @@ -611,5 +611,36 @@ public void NestedParagraph_ReturnsIndentedItems() "Last standalone paragraph is aligned with the level 1"); }); } + + [Test] + public void NestedTable_ReturnsAlignedTable() + { + var elements = converter.Parse(@"
      +
    1. Item 1 +
    Cell1
    +
      +
    1. Item 1.1 +
      Cell1.1
      +
    2. +
    + +
"); + + Assert.That(elements, Is.Not.Empty); + var odds = elements.Where((item, index) => index % 2 != 0); + Assert.That(odds, Has.All.TypeOf()); + for (int i = 0; i < 2; i++) + { + var table = (Table) odds.ElementAt(i); + var tableProperties = table.GetFirstChild(); + Assert.That(tableProperties, Is.Not.Null); + Assert.That(tableProperties.TableIndentation, Is.Not.Null); + Assert.That(tableProperties.TableIndentation.Width?.Value, Is.EqualTo(720 * (i+1))); + Assert.That(tableProperties.TableWidth, Is.Not.Null); + Assert.That(tableProperties.TableWidth.Type?.Value, Is.EqualTo(TableWidthUnitValues.Pct)); + Assert.That(tableProperties.TableWidth.Width?.HasValue, Is.True); + Assert.That(Convert.ToInt32(tableProperties.TableWidth.Width.Value), Is.GreaterThan(0).And.LessThan(5000)); + } + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/ParserTests.cs b/test/HtmlToOpenXml.Tests/ParserTests.cs index a3dcd8b4..efb64094 100644 --- a/test/HtmlToOpenXml.Tests/ParserTests.cs +++ b/test/HtmlToOpenXml.Tests/ParserTests.cs @@ -39,12 +39,12 @@ public void Paragraph_WithUnclosedTags_ShouldApplyStyle() var runProperties = elements[0].ChildElements[0].GetFirstChild(); Assert.That(runProperties, Is.Null); - runProperties = elements[0].ChildElements[2].GetFirstChild(); + runProperties = elements[0].ChildElements[1].GetFirstChild(); Assert.That(runProperties, Is.Not.Null); Assert.That(runProperties.HasChild(), Is.EqualTo(true)); Assert.That(runProperties.HasChild(), Is.EqualTo(false)); - runProperties = elements[0].ChildElements[4].GetFirstChild(); + runProperties = elements[0].ChildElements[2].GetFirstChild(); Assert.That(runProperties, Is.Not.Null); Assert.That(runProperties.HasChild(), Is.EqualTo(true)); Assert.That(runProperties.HasChild(), Is.EqualTo(true)); @@ -57,14 +57,14 @@ public void ConsecutiveParagraph_WithUnclosedTags_ShouldContinueStyle() Assert.That(elements, Has.Count.EqualTo(2)); Assert.Multiple(() => { - Assert.That(elements[0].ChildElements, Has.Count.EqualTo(3)); - Assert.That(elements[1].ChildElements, Has.Count.EqualTo(3)); + Assert.That(elements[0].Elements().Count, Is.EqualTo(2)); + Assert.That(elements[1].Elements().Count, Is.EqualTo(2)); }); var runProperties = elements[0].ChildElements[0].GetFirstChild(); Assert.That(runProperties, Is.Null); - runProperties = elements[0].ChildElements[2].GetFirstChild(); + runProperties = elements[0].GetLastChild()!.GetFirstChild(); Assert.That(runProperties, Is.Not.Null); Assert.That(runProperties.HasChild(), Is.EqualTo(true)); @@ -73,7 +73,7 @@ public void ConsecutiveParagraph_WithUnclosedTags_ShouldContinueStyle() Assert.That(runProperties.HasChild(), Is.EqualTo(true)); Assert.That(runProperties.HasChild(), Is.EqualTo(false)); - runProperties = elements[1].ChildElements[2].GetFirstChild(); + runProperties = elements[1].GetLastChild()!.GetFirstChild(); Assert.That(runProperties, Is.Not.Null); Assert.That(runProperties.HasChild(), Is.EqualTo(true)); Assert.That(runProperties.HasChild(), Is.EqualTo(true)); @@ -85,8 +85,8 @@ public void ConsecutiveParagraph_WithClosedTags_ShouldNotContinueStyle() // this should generate a new paragraph with its own style var elements = converter.Parse("

First paragraph in italics

Second paragraph not in italic

"); Assert.That(elements, Has.Count.EqualTo(2)); - Assert.That(elements[0].ChildElements, Has.Count.EqualTo(3)); - Assert.That(elements[1].ChildElements, Has.Count.EqualTo(1)); + Assert.That(elements[0].Elements().Count, Is.EqualTo(2)); + Assert.That(elements[1].Elements().Count, Is.EqualTo(1)); Assert.That(elements[1].FirstChild, Is.TypeOf(typeof(Run))); var runProperties = elements[1].FirstChild.GetFirstChild(); @@ -94,13 +94,13 @@ public void ConsecutiveParagraph_WithClosedTags_ShouldNotContinueStyle() } [TestCase("

Some\ntext

", ExpectedResult = 1)] - [TestCase("

Some bold\ntext

", ExpectedResult = 5)] - [TestCase("\t

Some bold\ntext

", ExpectedResult = 5)] + [TestCase("

Some bold\ntext

", ExpectedResult = 3)] + [TestCase("\t

Some bold\ntext

", ExpectedResult = 3)] [TestCase("

Some text

", ExpectedResult = 1)] public int Newline_ReturnsRunCount (string html) { var elements = converter.Parse(html); - return elements[0].Count(c => c is Run); + return elements[0].Elements().Count(); } [TestCase(" < b >bold", ExpectedResult = "< b >bold")] diff --git a/test/HtmlToOpenXml.Tests/Resources/Mozilla_dinosaur_head_logo.svg b/test/HtmlToOpenXml.Tests/Resources/Mozilla_dinosaur_head_logo.svg new file mode 100644 index 00000000..7af5bd27 --- /dev/null +++ b/test/HtmlToOpenXml.Tests/Resources/Mozilla_dinosaur_head_logo.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/test/HtmlToOpenXml.Tests/Resources/The-Song-of-the-World.jpg b/test/HtmlToOpenXml.Tests/Resources/The-Song-of-the-World.jpg new file mode 100644 index 00000000..5e1d8983 Binary files /dev/null and b/test/HtmlToOpenXml.Tests/Resources/The-Song-of-the-World.jpg differ diff --git a/test/HtmlToOpenXml.Tests/Resources/smiley.gif b/test/HtmlToOpenXml.Tests/Resources/smiley.gif new file mode 100644 index 00000000..0719d8e2 Binary files /dev/null and b/test/HtmlToOpenXml.Tests/Resources/smiley.gif differ diff --git a/test/HtmlToOpenXml.Tests/StyleTests.cs b/test/HtmlToOpenXml.Tests/StyleTests.cs index 24afe765..d9228a26 100644 --- a/test/HtmlToOpenXml.Tests/StyleTests.cs +++ b/test/HtmlToOpenXml.Tests/StyleTests.cs @@ -25,7 +25,7 @@ public void UseVariantStyle_ReturnsAppliedStyle() Type = args.Type, BasedOn = new BasedOn { Val = "Normal" }, StyleRunProperties = new() { - Color = new() { Val = HtmlColorTranslator.FromHtml("red").ToHexString() } + Color = new() { Val = HtmlColor.Parse("red").ToHexString() } } }); }; diff --git a/test/HtmlToOpenXml.Tests/TableTests.cs b/test/HtmlToOpenXml.Tests/TableTests.cs index 57d463cc..b7e35705 100644 --- a/test/HtmlToOpenXml.Tests/TableTests.cs +++ b/test/HtmlToOpenXml.Tests/TableTests.cs @@ -247,7 +247,7 @@ public void CellSpacing_ReturnsTableCellWithSpacing() } [TestCaseSource(nameof(BorderWidthCases))] - public void HtmlBorders_ShouldSucceed(string borderAtrribute, IEnumerable expectedBorderValue, IEnumerable expectedBorderWidth) + public void HtmlBorders_ShouldSucceed(string borderAtrribute, IEnumerable? expectedBorderValue, IEnumerable? expectedBorderWidth) { // we specify a style which doesn't handle borders converter.HtmlStyles.AddStyle(new Style { @@ -260,6 +260,12 @@ public void HtmlBorders_ShouldSucceed(string borderAtrribute, IEnumerable()); var borders = elements[0].GetFirstChild()?.TableBorders; + if (expectedBorderValue is null) + { + Assert.That(borders, Is.Null); + return; + } + Assert.That(borders, Is.Not.Null); Assert.That(borders.HasChild(), Is.True); Assert.That(new string?[] { borders.TopBorder?.Val?.InnerText, @@ -289,8 +295,8 @@ public void HtmlBorders_ShouldSucceed(string borderAtrribute, IEnumerable
@@ -341,7 +347,35 @@ public void TableCaptionAlign_ReturnsPositionedParagraph_AlignedWithTable(string Assert.That(elements, Has.Count.EqualTo(2)); var caption = (Paragraph) elements[1]; - Assert.That(caption.ParagraphProperties?.Justification?.Val?.ToString(), Is.EqualTo(expectedAlign)); + return caption.ParagraphProperties?.Justification?.Val?.ToString(); + } + + [TestCase("align='right'", ExpectedResult = "right")] + [TestCase("style='justify-self:center'", ExpectedResult = "center")] + [TestCase("style='margin-left:auto'", ExpectedResult = "right")] + [TestCase("style='margin-left:auto;margin-right:auto'", ExpectedResult = "center")] + public string? TableAlign_ReturnsTableJustification(string style) + { + var elements = converter.Parse(@$"
Some table caption
+ +
Cell 1.1
"); + + Assert.That(elements, Has.Count.EqualTo(1)); + return elements[0].GetFirstChild()?.TableJustification?.Val?.ToString(); + } + + [TestCase("", ExpectedResult = "right")] + [TestCase("justify-self:center", ExpectedResult = "center")] + public string? NestedTableAlign_ReturnsTableOrParentJustification(string tableStyle) + { + var elements = converter.Parse(@$"
+ + +
Cell 1.1
+
"); + + Assert.That(elements, Has.Count.EqualTo(1)); + return elements[0].GetFirstChild()?.TableJustification?.Val?.ToString(); } [Test] @@ -467,6 +501,7 @@ public void NestedTable_ReturnsTableInsideTable() var cell = elements[0].GetFirstChild()?.GetFirstChild(); Assert.That(cell, Is.Not.Null); Assert.That(cell.HasChild(), Is.True); + Assert.That(cell.HasChild(), Is.True, "Word requires at least a paragraph"); } [Test] @@ -627,5 +662,26 @@ public void CellBorders_ShouldNotPropagate_OnRuns() Assert.That(runs.Count(), Is.EqualTo(1)); Assert.That(runs.First().RunProperties?.Border, Is.Null); } + + [TestCase("100%", "pct", "5000")] + [TestCase("auto", "auto", "0")] + [TestCase("120px", "dxa", "1800")] + [TestCase("", "pct", "5000", Description = "Defaults to 100%")] + public void Width_ReturnsRefineTableWidth(string width, string expectedUnit, string expectedValue) + { + var elements = converter.Parse(@$"
+ +
Placeholder
"); + + Assert.That(elements, Has.Count.EqualTo(1)); + Assert.That(elements, Has.All.TypeOf()); + var tableWidth = elements[0].GetFirstChild()?.TableWidth; + Assert.That(tableWidth, Is.Not.Null); + Assert.Multiple(() => + { + Assert.That(tableWidth?.Type?.Value, Is.EqualTo(new TableWidthUnitValues(expectedUnit))); + Assert.That(tableWidth?.Width?.Value, Is.EqualTo(expectedValue)); + }); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/Utilities/MockHttpMessageHandler.cs b/test/HtmlToOpenXml.Tests/Utilities/MockHttpMessageHandler.cs new file mode 100644 index 00000000..af258be8 --- /dev/null +++ b/test/HtmlToOpenXml.Tests/Utilities/MockHttpMessageHandler.cs @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 Deal Stream sàrl. All rights reserved + */ +using System.Net.Http; + +namespace HtmlToOpenXml.Tests +{ + public class MockHttpMessageHandler : HttpMessageHandler + { + private readonly Func> _getResponseFunc; + + public MockHttpMessageHandler(Func> getResponseFunc) + { + _getResponseFunc = getResponseFunc; + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + return await _getResponseFunc(request.RequestUri); + } + } +} \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/WhitespaceTests.cs b/test/HtmlToOpenXml.Tests/WhitespaceTests.cs index 4a33b15c..7389c59f 100644 --- a/test/HtmlToOpenXml.Tests/WhitespaceTests.cs +++ b/test/HtmlToOpenXml.Tests/WhitespaceTests.cs @@ -14,7 +14,7 @@ public class WhitespaceTests : HtmlConverterTestBase public void ConsecutivePhrasing_ReturnsOneParagraphWithMulitpleRuns () { // the new line should generate a space between "bold" and "text" - var elements = converter.Parse("This is a bold\ntext"); + var elements = converter.Parse("This is a bold\ntext"); Assert.That(elements, Has.Count.EqualTo(1)); Assert.That(elements, Has.All.TypeOf()); Assert.That(elements[0].ChildElements, Is.All.TypeOf()); @@ -32,15 +32,21 @@ public void ConsecutiveDivs_ReturnsMultipleParagraphs () Assert.That(elements[1].InnerText, Is.EqualTo("World")); } + [TestCase("

Hello \n World!

")] + [TestCase("

Hello World!

")] + [TestCase("Hello World!")] [TestCase("

Hello\r\n World!

")] [TestCase(" Hello \r\n World! ")] [TestCase(" Hello\r\n\r\nWorld! ")] + [TestCase("
Hello World!
")] + [TestCase("
\n Hello World!")] + [TestCase("

Hello World!

")] public void Multiline_ReturnsCollapsedText (string html) { var elements = converter.Parse(html); - Assert.That(elements, Has.Count.EqualTo(1)); + Assert.That(elements, Has.Count.GreaterThanOrEqualTo(1)); Assert.That(elements, Has.All.TypeOf()); - Assert.That(elements[0].InnerText, Is.EqualTo("Hello World!")); + Assert.That(elements.Last().InnerText, Is.EqualTo("Hello World!")); } [TestCase("h1")] @@ -97,5 +103,36 @@ public void ConsecutivePhrasingWithBreak_ReturnsSecondLineWithNoSpaces() }); Assert.That(((Text)runs.ElementAt(2).FirstChild).Text, Is.EqualTo("World")); } + + [Test] + public void NoTextPhrasing_ShouldBeIgnored() + { + var elements = converter.Parse("

Texte
"); + Assert.That(elements, Has.Count.GreaterThanOrEqualTo(2)); + Assert.Multiple(() => + { + Assert.That(elements[0].GetFirstChild()?.HasChild(), Is.False, "Standalone line break is replaced with an empty paragraph"); + Assert.That(elements.Last().InnerText, Is.EqualTo("Texte")); + Assert.That(elements.Select(e => e.Elements().Count()), Has.All.EqualTo(1)); + }); + } + + [Test] + public void ConsecutiveSpans_WithNoSpace_ReturnsOneParagraphWithNoSpace () + { + var elements = converter.Parse("HelloWorld"); + Assert.That(elements, Has.Count.EqualTo(1)); + Assert.That(elements, Is.All.TypeOf()); + Assert.That(elements[0].InnerText, Is.EqualTo("HelloWorld")); + } + + [Test] + public void ConsecutiveSpans_WithSpaces_ReturnsOneParagraphWithNoSpace () + { + var elements = converter.Parse("Hello World"); + Assert.That(elements, Has.Count.EqualTo(1)); + Assert.That(elements, Is.All.TypeOf()); + Assert.That(elements[0].InnerText, Is.EqualTo("Hello World")); + } } } \ No newline at end of file