-
Notifications
You must be signed in to change notification settings - Fork 3.2k
/
Copy pathHtmlToXamlConverter.cs
2764 lines (2481 loc) · 124 KB
/
HtmlToXamlConverter.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// // Copyright (c) Microsoft. All rights reserved.
// // Licensed under the MIT license. See LICENSE file in the project root for full license information.
using System;
using System.Collections;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics;
using System.Globalization;
using System.Windows;
using System.Windows.Documents;
using System.Xml;
namespace HtmlToXamlDemo
{
/// <summary>
/// HtmlToXamlConverter is a static class that takes an HTML string
/// and converts it into XAML
/// </summary>
public static class HtmlToXamlConverter
{
// ----------------------------------------------------------------
//
// Internal Constants
//
// ----------------------------------------------------------------
// The constants represent all Xaml names used in a conversion
public const string XamlFlowDocument = "FlowDocument";
public const string XamlRun = "Run";
public const string XamlSpan = "Span";
public const string XamlHyperlink = "Hyperlink";
public const string XamlHyperlinkNavigateUri = "NavigateUri";
public const string XamlHyperlinkTargetName = "TargetName";
public const string XamlSection = "Section";
public const string XamlList = "List";
public const string XamlListMarkerStyle = "MarkerStyle";
public const string XamlListMarkerStyleNone = "None";
public const string XamlListMarkerStyleDecimal = "Decimal";
public const string XamlListMarkerStyleDisc = "Disc";
public const string XamlListMarkerStyleCircle = "Circle";
public const string XamlListMarkerStyleSquare = "Square";
public const string XamlListMarkerStyleBox = "Box";
public const string XamlListMarkerStyleLowerLatin = "LowerLatin";
public const string XamlListMarkerStyleUpperLatin = "UpperLatin";
public const string XamlListMarkerStyleLowerRoman = "LowerRoman";
public const string XamlListMarkerStyleUpperRoman = "UpperRoman";
public const string XamlListItem = "ListItem";
public const string XamlLineBreak = "LineBreak";
public const string XamlParagraph = "Paragraph";
public const string XamlMargin = "Margin";
public const string XamlPadding = "Padding";
public const string XamlBorderBrush = "BorderBrush";
public const string XamlBorderThickness = "BorderThickness";
public const string XamlTable = "Table";
// flowdocument table requires this element, take Table prefix because XMLReader cannot resolve the namespace of this element
public const string XamlTableColumnGroup = "Table.Columns";
public const string XamlTableColumn = "TableColumn";
public const string XamlTableRowGroup = "TableRowGroup";
public const string XamlTableRow = "TableRow";
public const string XamlTableCell = "TableCell";
public const string XamlTableCellBorderThickness = "BorderThickness";
public const string XamlTableCellBorderBrush = "BorderBrush";
public const string XamlTableCellColumnSpan = "ColumnSpan";
public const string XamlTableCellRowSpan = "RowSpan";
public const string XamlWidth = "Width";
public const string XamlBrushesBlack = "Black";
public const string XamlFontFamily = "FontFamily";
public const string XamlFontSize = "FontSize";
public const string XamlFontSizeXxLarge = "22pt"; // "XXLarge";
public const string XamlFontSizeXLarge = "20pt"; // "XLarge";
public const string XamlFontSizeLarge = "18pt"; // "Large";
public const string XamlFontSizeMedium = "16pt"; // "Medium";
public const string XamlFontSizeSmall = "12pt"; // "Small";
public const string XamlFontSizeXSmall = "10pt"; // "XSmall";
public const string XamlFontSizeXxSmall = "8pt"; // "XXSmall";
public const string XamlFontWeight = "FontWeight";
public const string XamlFontWeightBold = "Bold";
public const string XamlFontStyle = "FontStyle";
public const string XamlForeground = "Foreground";
public const string XamlBackground = "Background";
public const string XamlTextDecorations = "TextDecorations";
public const string XamlTextDecorationsUnderline = "Underline";
public const string XamlTextIndent = "TextIndent";
public const string XamlTextAlignment = "TextAlignment";
// ---------------------------------------------------------------------
//
// Private Fields
//
// ---------------------------------------------------------------------
#region Private Fields
private static readonly string XamlNamespace = "http://schemas.microsoft.com/winfx/2006/xaml/presentation";
#endregion Private Fields
// ---------------------------------------------------------------------
//
// Internal Methods
//
// ---------------------------------------------------------------------
#region Internal Methods
/// <summary>
/// Converts an html string into xaml string.
/// </summary>
/// <param name="htmlString">
/// Input html which may be badly formatted xml.
/// </param>
/// <param name="asFlowDocument">
/// true indicates that we need a FlowDocument as a root element;
/// false means that Section or Span elements will be used
/// depending on StartFragment/EndFragment comments locations.
/// </param>
/// <returns>
/// Well-formed xml representing XAML equivalent for the input html string.
/// </returns>
public static string ConvertHtmlToXaml(string htmlString, bool asFlowDocument)
{
// Create well-formed Xml from Html string
var htmlElement = HtmlParser.ParseHtml(htmlString);
// Decide what name to use as a root
var rootElementName = asFlowDocument ? XamlFlowDocument : XamlSection;
// Create an XmlDocument for generated xaml
var xamlTree = new XmlDocument();
var xamlFlowDocumentElement = xamlTree.CreateElement(null, rootElementName, XamlNamespace);
// Extract style definitions from all STYLE elements in the document
var stylesheet = new CssStylesheet(htmlElement);
// Source context is a stack of all elements - ancestors of a parentElement
var sourceContext = new List<XmlElement>(10);
// Clear fragment parent
_inlineFragmentParentElement = null;
// convert root html element
AddBlock(xamlFlowDocumentElement, htmlElement, new Hashtable(), stylesheet, sourceContext);
// In case if the selected fragment is inline, extract it into a separate Span wrapper
if (!asFlowDocument)
{
xamlFlowDocumentElement = ExtractInlineFragment(xamlFlowDocumentElement);
}
// Return a string representing resulting Xaml
xamlFlowDocumentElement.SetAttribute("xml:space", "preserve");
var xaml = xamlFlowDocumentElement.OuterXml;
return xaml;
}
/// <summary>
/// Returns a value for an attribute by its name (ignoring casing)
/// </summary>
/// <param name="element">
/// XmlElement in which we are trying to find the specified attribute
/// </param>
/// <param name="attributeName">
/// String representing the attribute name to be searched for
/// </param>
/// <returns></returns>
public static string GetAttribute(XmlElement element, string attributeName)
{
attributeName = attributeName.ToLower();
for (var i = 0; i < element.Attributes.Count; i++)
{
if (element.Attributes[i].Name.ToLower() == attributeName)
{
return element.Attributes[i].Value;
}
}
return null;
}
/// <summary>
/// Returns string extracted from quotation marks
/// </summary>
/// <param name="value">
/// String representing value enclosed in quotation marks
/// </param>
internal static string UnQuote(string value)
{
if (value.StartsWith("\"") && value.EndsWith("\"") || value.StartsWith("'") && value.EndsWith("'"))
{
value = value.Substring(1, value.Length - 2).Trim();
}
return value;
}
#endregion Internal Methods
// ---------------------------------------------------------------------
//
// Private Methods
//
// ---------------------------------------------------------------------
#region Private Methods
/// <summary>
/// Analyzes the given htmlElement expecting it to be converted
/// into some of xaml Block elements and adds the converted block
/// to the children collection of xamlParentElement.
/// Analyzes the given XmlElement htmlElement, recognizes it as some HTML element
/// and adds it as a child to a xamlParentElement.
/// In some cases several following siblings of the given htmlElement
/// will be consumed too (e.g. LIs encountered without wrapping UL/OL,
/// which must be collected together and wrapped into one implicit List element).
/// </summary>
/// <param name="xamlParentElement">
/// Parent xaml element, to which new converted element will be added
/// </param>
/// <param name="htmlElement">
/// Source html element subject to convert to xaml.
/// </param>
/// <param name="inheritedProperties">
/// Properties inherited from an outer context.
/// </param>
/// <param name="stylesheet"></param>
/// <param name="sourceContext"></param>
/// <returns>
/// Last processed html node. Normally it should be the same htmlElement
/// as was passed as a parameter, but in some irregular cases
/// it could one of its following siblings.
/// The caller must use this node to get to next sibling from it.
/// </returns>
private static XmlNode AddBlock(XmlElement xamlParentElement, XmlNode htmlNode, Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
if (htmlNode is XmlComment)
{
DefineInlineFragmentParent((XmlComment) htmlNode, /*xamlParentElement:*/null);
}
else if (htmlNode is XmlText)
{
htmlNode = AddImplicitParagraph(xamlParentElement, htmlNode, inheritedProperties, stylesheet,
sourceContext);
}
else if (htmlNode is XmlElement)
{
// Identify element name
var htmlElement = (XmlElement) htmlNode;
var htmlElementName = htmlElement.LocalName; // Keep the name case-sensitive to check xml names
var htmlElementNamespace = htmlElement.NamespaceURI;
if (htmlElementNamespace != HtmlParser.XhtmlNamespace)
{
// Non-html element. skip it
// Isn't it too aggressive? What if this is just an error in html tag name?
// TODO: Consider skipping just a wrapper in recursing into the element tree,
// which may produce some garbage though coming from xml fragments.
return htmlElement;
}
// Put source element to the stack
sourceContext.Add(htmlElement);
// Convert the name to lowercase, because html elements are case-insensitive
htmlElementName = htmlElementName.ToLower();
// Switch to an appropriate kind of processing depending on html element name
switch (htmlElementName)
{
// Sections:
case "html":
case "body":
case "div":
case "form": // not a block according to xhtml spec
case "pre": // Renders text in a fixed-width font
case "blockquote":
case "caption":
case "center":
case "cite":
AddSection(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
// Paragraphs:
case "p":
case "h1":
case "h2":
case "h3":
case "h4":
case "h5":
case "h6":
case "nsrtitle":
case "textarea":
case "dd": // ???
case "dl": // ???
case "dt": // ???
case "tt": // ???
AddParagraph(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "ol":
case "ul":
case "dir": // treat as UL element
case "menu": // treat as UL element
// List element conversion
AddList(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "li":
// LI outside of OL/UL
// Collect all sibling LIs, wrap them into a List and then proceed with the element following the last of LIs
htmlNode = AddOrphanListItems(xamlParentElement, htmlElement, inheritedProperties, stylesheet,
sourceContext);
break;
case "img":
// TODO: Add image processing
AddImage(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "table":
// hand off to table parsing function which will perform special table syntax checks
AddTable(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "tbody":
case "tfoot":
case "thead":
case "tr":
case "td":
case "th":
// Table stuff without table wrapper
// TODO: add special-case processing here for elements that should be within tables when the
// parent element is NOT a table. If the parent element is a table they can be processed normally.
// we need to compare against the parent element here, we can't just break on a switch
goto default; // Thus we will skip this element as unknown, but still recurse into it.
case "style": // We already pre-processed all style elements. Ignore it now
case "meta":
case "head":
case "title":
case "script":
// Ignore these elements
break;
default:
// Wrap a sequence of inlines into an implicit paragraph
htmlNode = AddImplicitParagraph(xamlParentElement, htmlElement, inheritedProperties, stylesheet,
sourceContext);
break;
}
// Remove the element from the stack
Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlElement);
sourceContext.RemoveAt(sourceContext.Count - 1);
}
// Return last processed node
return htmlNode;
}
// .............................................................
//
// Line Breaks
//
// .............................................................
private static void AddBreak(XmlElement xamlParentElement, string htmlElementName)
{
// Create new xaml element corresponding to this html element
var xamlLineBreak = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlLineBreak, XamlNamespace);
xamlParentElement.AppendChild(xamlLineBreak);
if (htmlElementName == "hr")
{
var xamlHorizontalLine = xamlParentElement.OwnerDocument.CreateTextNode("----------------------");
xamlParentElement.AppendChild(xamlHorizontalLine);
xamlLineBreak = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlLineBreak, XamlNamespace);
xamlParentElement.AppendChild(xamlLineBreak);
}
}
// .............................................................
//
// Text Flow Elements
//
// .............................................................
/// <summary>
/// Generates Section or Paragraph element from DIV depending whether it contains any block elements or not
/// </summary>
/// <param name="xamlParentElement">
/// XmlElement representing Xaml parent to which the converted element should be added
/// </param>
/// <param name="htmlElement">
/// XmlElement representing Html element to be converted
/// </param>
/// <param name="inheritedProperties">
/// properties inherited from parent context
/// </param>
/// <param name="stylesheet"></param>
/// <param name="sourceContext"></param>
/// true indicates that a content added by this call contains at least one block element
/// </param>
private static void AddSection(XmlElement xamlParentElement, XmlElement htmlElement,
Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Analyze the content of htmlElement to decide what xaml element to choose - Section or Paragraph.
// If this Div has at least one block child then we need to use Section, otherwise use Paragraph
var htmlElementContainsBlocks = false;
for (var htmlChildNode = htmlElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode.NextSibling)
{
if (htmlChildNode is XmlElement)
{
var htmlChildName = ((XmlElement) htmlChildNode).LocalName.ToLower();
if (HtmlSchema.IsBlockElement(htmlChildName))
{
htmlElementContainsBlocks = true;
break;
}
}
}
if (!htmlElementContainsBlocks)
{
// The Div does not contain any block elements, so we can treat it as a Paragraph
AddParagraph(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
}
else
{
// The Div has some nested blocks, so we treat it as a Section
// Create currentProperties as a compilation of local and inheritedProperties, set localProperties
Hashtable localProperties;
var currentProperties = GetElementProperties(htmlElement, inheritedProperties, out localProperties,
stylesheet,
sourceContext);
// Create a XAML element corresponding to this html element
var xamlElement = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlSection, XamlNamespace);
ApplyLocalProperties(xamlElement, localProperties, /*isBlock:*/true);
// Decide whether we can unwrap this element as not having any formatting significance.
if (!xamlElement.HasAttributes)
{
// This elements is a group of block elements without any additional formatting.
// We can add blocks directly to xamlParentElement and avoid
// creating unnecessary Sections nesting.
xamlElement = xamlParentElement;
}
// Recurse into element subtree
for (var htmlChildNode = htmlElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode?.NextSibling)
{
htmlChildNode = AddBlock(xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext);
}
// Add the new element to the parent.
if (xamlElement != xamlParentElement)
{
xamlParentElement.AppendChild(xamlElement);
}
}
}
/// <summary>
/// Generates Paragraph element from P, H1-H7, Center etc.
/// </summary>
/// <param name="xamlParentElement">
/// XmlElement representing Xaml parent to which the converted element should be added
/// </param>
/// <param name="htmlElement">
/// XmlElement representing Html element to be converted
/// </param>
/// <param name="inheritedProperties">
/// properties inherited from parent context
/// </param>
/// <param name="stylesheet"></param>
/// <param name="sourceContext"></param>
/// true indicates that a content added by this call contains at least one block element
/// </param>
private static void AddParagraph(XmlElement xamlParentElement, XmlElement htmlElement,
Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Create currentProperties as a compilation of local and inheritedProperties, set localProperties
Hashtable localProperties;
var currentProperties = GetElementProperties(htmlElement, inheritedProperties, out localProperties,
stylesheet,
sourceContext);
// Create a XAML element corresponding to this html element
var xamlElement = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlParagraph, XamlNamespace);
ApplyLocalProperties(xamlElement, localProperties, /*isBlock:*/true);
// Recurse into element subtree
for (var htmlChildNode = htmlElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode.NextSibling)
{
AddInline(xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext);
}
// Add the new element to the parent.
xamlParentElement.AppendChild(xamlElement);
}
/// <summary>
/// Creates a Paragraph element and adds all nodes starting from htmlNode
/// converted to appropriate Inlines.
/// </summary>
/// <param name="xamlParentElement">
/// XmlElement representing Xaml parent to which the converted element should be added
/// </param>
/// <param name="htmlNode">
/// XmlNode starting a collection of implicitly wrapped inlines.
/// </param>
/// <param name="inheritedProperties">
/// properties inherited from parent context
/// </param>
/// <param name="stylesheet"></param>
/// <param name="sourceContext"></param>
/// true indicates that a content added by this call contains at least one block element
/// </param>
/// <returns>
/// The last htmlNode added to the implicit paragraph
/// </returns>
private static XmlNode AddImplicitParagraph(XmlElement xamlParentElement, XmlNode htmlNode,
Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Collect all non-block elements and wrap them into implicit Paragraph
var xamlParagraph = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlParagraph, XamlNamespace);
XmlNode lastNodeProcessed = null;
while (htmlNode != null)
{
if (htmlNode is XmlComment)
{
DefineInlineFragmentParent((XmlComment) htmlNode, /*xamlParentElement:*/null);
}
else if (htmlNode is XmlText)
{
if (htmlNode.Value.Trim().Length > 0)
{
AddTextRun(xamlParagraph, htmlNode.Value);
}
}
else if (htmlNode is XmlElement)
{
var htmlChildName = ((XmlElement) htmlNode).LocalName.ToLower();
if (HtmlSchema.IsBlockElement(htmlChildName))
{
// The sequence of non-blocked inlines ended. Stop implicit loop here.
break;
}
AddInline(xamlParagraph, (XmlElement) htmlNode, inheritedProperties, stylesheet, sourceContext);
}
// Store last processed node to return it at the end
lastNodeProcessed = htmlNode;
htmlNode = htmlNode.NextSibling;
}
// Add the Paragraph to the parent
// If only whitespaces and commens have been encountered,
// then we have nothing to add in implicit paragraph; forget it.
if (xamlParagraph.FirstChild != null)
{
xamlParentElement.AppendChild(xamlParagraph);
}
// Need to return last processed node
return lastNodeProcessed;
}
// .............................................................
//
// Inline Elements
//
// .............................................................
private static void AddInline(XmlElement xamlParentElement, XmlNode htmlNode, Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
if (htmlNode is XmlComment)
{
DefineInlineFragmentParent((XmlComment) htmlNode, xamlParentElement);
}
else if (htmlNode is XmlText)
{
AddTextRun(xamlParentElement, htmlNode.Value);
}
else if (htmlNode is XmlElement)
{
var htmlElement = (XmlElement) htmlNode;
// Check whether this is an html element
if (htmlElement.NamespaceURI != HtmlParser.XhtmlNamespace)
{
return; // Skip non-html elements
}
// Identify element name
var htmlElementName = htmlElement.LocalName.ToLower();
// Put source element to the stack
sourceContext.Add(htmlElement);
switch (htmlElementName)
{
case "a":
AddHyperlink(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "img":
AddImage(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "br":
case "hr":
AddBreak(xamlParentElement, htmlElementName);
break;
default:
if (HtmlSchema.IsInlineElement(htmlElementName) || HtmlSchema.IsBlockElement(htmlElementName))
{
// Note: actually we do not expect block elements here,
// but if it happens to be here, we will treat it as a Span.
AddSpanOrRun(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
}
break;
}
// Ignore all other elements non-(block/inline/image)
// Remove the element from the stack
Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlElement);
sourceContext.RemoveAt(sourceContext.Count - 1);
}
}
private static void AddSpanOrRun(XmlElement xamlParentElement, XmlElement htmlElement,
Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Decide what XAML element to use for this inline element.
// Check whether it contains any nested inlines
var elementHasChildren = false;
for (var htmlNode = htmlElement.FirstChild; htmlNode != null; htmlNode = htmlNode.NextSibling)
{
if (htmlNode is XmlElement)
{
var htmlChildName = ((XmlElement) htmlNode).LocalName.ToLower();
if (HtmlSchema.IsInlineElement(htmlChildName) || HtmlSchema.IsBlockElement(htmlChildName) ||
htmlChildName == "img" || htmlChildName == "br" || htmlChildName == "hr")
{
elementHasChildren = true;
break;
}
}
}
var xamlElementName = elementHasChildren ? XamlSpan : XamlRun;
// Create currentProperties as a compilation of local and inheritedProperties, set localProperties
Hashtable localProperties;
var currentProperties = GetElementProperties(htmlElement, inheritedProperties, out localProperties,
stylesheet,
sourceContext);
// Create a XAML element corresponding to this html element
var xamlElement = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/xamlElementName, XamlNamespace);
ApplyLocalProperties(xamlElement, localProperties, /*isBlock:*/false);
// Recurse into element subtree
for (var htmlChildNode = htmlElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode.NextSibling)
{
AddInline(xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext);
}
// Add the new element to the parent.
xamlParentElement.AppendChild(xamlElement);
}
// Adds a text run to a xaml tree
private static void AddTextRun(XmlElement xamlElement, string textData)
{
// Remove control characters
for (var i = 0; i < textData.Length; i++)
{
if (char.IsControl(textData[i]))
{
textData = textData.Remove(i--, 1); // decrement i to compensate for character removal
}
}
// Replace No-Breaks by spaces (160 is a code of entity in html)
// This is a work around since WPF/XAML does not support  .
textData = textData.Replace((char) 160, ' ');
if (textData.Length > 0)
{
xamlElement.AppendChild(xamlElement.OwnerDocument.CreateTextNode(textData));
}
}
private static void AddHyperlink(XmlElement xamlParentElement, XmlElement htmlElement,
Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Convert href attribute into NavigateUri and TargetName
var href = GetAttribute(htmlElement, "href");
if (href == null)
{
// When href attribute is missing - ignore the hyperlink
AddSpanOrRun(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
}
else
{
// Create currentProperties as a compilation of local and inheritedProperties, set localProperties
Hashtable localProperties;
var currentProperties = GetElementProperties(htmlElement, inheritedProperties, out localProperties,
stylesheet,
sourceContext);
// Create a XAML element corresponding to this html element
var xamlElement = xamlParentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlHyperlink, XamlNamespace);
ApplyLocalProperties(xamlElement, localProperties, /*isBlock:*/false);
var hrefParts = href.Split('#');
if (hrefParts.Length > 0 && hrefParts[0].Trim().Length > 0)
{
xamlElement.SetAttribute(XamlHyperlinkNavigateUri, hrefParts[0].Trim());
}
if (hrefParts.Length == 2 && hrefParts[1].Trim().Length > 0)
{
xamlElement.SetAttribute(XamlHyperlinkTargetName, hrefParts[1].Trim());
}
// Recurse into element subtree
for (var htmlChildNode = htmlElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode.NextSibling)
{
AddInline(xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext);
}
// Add the new element to the parent.
xamlParentElement.AppendChild(xamlElement);
}
}
// Stores a parent xaml element for the case when selected fragment is inline.
private static XmlElement _inlineFragmentParentElement;
// Called when html comment is encountered to store a parent element
// for the case when the fragment is inline - to extract it to a separate
// Span wrapper after the conversion.
private static void DefineInlineFragmentParent(XmlComment htmlComment, XmlElement xamlParentElement)
{
if (htmlComment.Value == "StartFragment")
{
_inlineFragmentParentElement = xamlParentElement;
}
else if (htmlComment.Value == "EndFragment")
{
if (_inlineFragmentParentElement == null && xamlParentElement != null)
{
// Normally this cannot happen if comments produced by correct copying code
// in Word or IE, but when it is produced manually then fragment boundary
// markers can be inconsistent. In this case StartFragment takes precedence,
// but if it is not set, then we get the value from EndFragment marker.
_inlineFragmentParentElement = xamlParentElement;
}
}
}
// Extracts a content of an element stored as InlineFragmentParentElement
// into a separate Span wrapper.
// Note: when selected content does not cross paragraph boundaries,
// the fragment is marked within
private static XmlElement ExtractInlineFragment(XmlElement xamlFlowDocumentElement)
{
if (_inlineFragmentParentElement != null)
{
if (_inlineFragmentParentElement.LocalName == XamlSpan)
{
xamlFlowDocumentElement = _inlineFragmentParentElement;
}
else
{
xamlFlowDocumentElement = xamlFlowDocumentElement.OwnerDocument.CreateElement( /*prefix:*/
null, /*localName:*/XamlSpan, XamlNamespace);
while (_inlineFragmentParentElement.FirstChild != null)
{
var copyNode = _inlineFragmentParentElement.FirstChild;
_inlineFragmentParentElement.RemoveChild(copyNode);
xamlFlowDocumentElement.AppendChild(copyNode);
}
}
}
return xamlFlowDocumentElement;
}
// .............................................................
//
// Images
//
// .............................................................
private static void AddImage(XmlElement xamlParentElement, XmlElement htmlElement, Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Implement images
}
// .............................................................
//
// Lists
//
// .............................................................
/// <summary>
/// Converts Html ul or ol element into Xaml list element. During conversion if the ul/ol element has any children
/// that are not li elements, they are ignored and not added to the list element
/// </summary>
/// <param name="xamlParentElement">
/// XmlElement representing Xaml parent to which the converted element should be added
/// </param>
/// <param name="htmlListElement">
/// XmlElement representing Html ul/ol element to be converted
/// </param>
/// <param name="inheritedProperties">
/// properties inherited from parent context
/// </param>
/// <param name="stylesheet"></param>
/// <param name="sourceContext"></param>
private static void AddList(XmlElement xamlParentElement, XmlElement htmlListElement,
Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
var htmlListElementName = htmlListElement.LocalName.ToLower();
Hashtable localProperties;
var currentProperties = GetElementProperties(htmlListElement, inheritedProperties, out localProperties,
stylesheet, sourceContext);
// Create Xaml List element
var xamlListElement = xamlParentElement.OwnerDocument.CreateElement(null, XamlList, XamlNamespace);
// Set default list markers
xamlListElement.SetAttribute(XamlListMarkerStyle,
htmlListElementName == "ol" ? XamlListMarkerStyleDecimal : XamlListMarkerStyleDisc);
// Apply local properties to list to set marker attribute if specified
// TODO: Should we have separate list attribute processing function?
ApplyLocalProperties(xamlListElement, localProperties, /*isBlock:*/true);
// Recurse into list subtree
for (var htmlChildNode = htmlListElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode.NextSibling)
{
if (htmlChildNode is XmlElement && htmlChildNode.LocalName.ToLower() == "li")
{
sourceContext.Add((XmlElement) htmlChildNode);
AddListItem(xamlListElement, (XmlElement) htmlChildNode, currentProperties, stylesheet,
sourceContext);
Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlChildNode);
sourceContext.RemoveAt(sourceContext.Count - 1);
}
}
// Add the List element to xaml tree - if it is not empty
if (xamlListElement.HasChildNodes)
{
xamlParentElement.AppendChild(xamlListElement);
}
}
/// <summary>
/// If li items are found without a parent ul/ol element in Html string, creates xamlListElement as their parent and
/// adds
/// them to it. If the previously added node to the same xamlParentElement was a List, adds the elements to that list.
/// Otherwise, we create a new xamlListElement and add them to it. Elements are added as long as li elements appear
/// sequentially.
/// The first non-li or text node stops the addition.
/// </summary>
/// <param name="xamlParentElement">
/// Parent element for the list
/// </param>
/// <param name="htmlLiElement">
/// Start Html li element without parent list
/// </param>
/// <param name="inheritedProperties">
/// Properties inherited from parent context
/// </param>
/// <returns>
/// XmlNode representing the first non-li node in the input after one or more li's have been processed.
/// </returns>
private static XmlElement AddOrphanListItems(XmlElement xamlParentElement, XmlElement htmlLiElement,
Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
if (htmlLiElement == null)
{
// Handle the case where htmlLiElement is null
// For example, you might throw an exception or return early
throw new ArgumentNullException(nameof(htmlLiElement));
}
Debug.Assert(htmlLiElement.LocalName.ToLower() == "li");
XmlElement lastProcessedListItemElement = null;
// Find out the last element attached to the xamlParentElement, which is the previous sibling of this node
var xamlListItemElementPreviousSibling = xamlParentElement.LastChild;
XmlElement xamlListElement;
if (xamlListItemElementPreviousSibling != null && xamlListItemElementPreviousSibling.LocalName == XamlList)
{
// Previously added Xaml element was a list. We will add the new li to it
xamlListElement = (XmlElement)xamlListItemElementPreviousSibling;
}
else
{
// No list element near. Create our own.
xamlListElement = xamlParentElement.OwnerDocument.CreateElement(null, XamlList, XamlNamespace);
xamlParentElement.AppendChild(xamlListElement);
}
XmlNode htmlChildNode = htmlLiElement;
var htmlChildNodeName = htmlChildNode == null ? null : htmlChildNode.LocalName.ToLower();
// Current element properties missed here.
//currentProperties = GetElementProperties(htmlLIElement, inheritedProperties, out localProperties, stylesheet);
// Add li elements to the parent xamlListElement we created as long as they appear sequentially
// Use properties inherited from xamlParentElement for context
while (htmlChildNode != null && htmlChildNodeName == "li")
{
AddListItem(xamlListElement, (XmlElement)htmlChildNode, inheritedProperties, stylesheet, sourceContext);
lastProcessedListItemElement = (XmlElement)htmlChildNode;
htmlChildNode = htmlChildNode.NextSibling;
htmlChildNodeName = htmlChildNode?.LocalName.ToLower();
}
return lastProcessedListItemElement;
}
/// <summary>
/// Converts htmlLIElement into Xaml ListItem element, and appends it to the parent xamlListElement
/// </summary>
/// <param name="xamlListElement">
/// XmlElement representing Xaml List element to which the converted td/th should be added
/// </param>
/// <param name="htmlLiElement">
/// XmlElement representing Html li element to be converted
/// </param>
/// <param name="inheritedProperties">
/// Properties inherited from parent context
/// </param>
private static void AddListItem(XmlElement xamlListElement, XmlElement htmlLiElement,
Hashtable inheritedProperties,
CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
// Parameter validation
Debug.Assert(xamlListElement != null);
Debug.Assert(xamlListElement.LocalName == XamlList);
Debug.Assert(htmlLiElement != null);
Debug.Assert(htmlLiElement.LocalName.ToLower() == "li");
Debug.Assert(inheritedProperties != null);
Hashtable localProperties;
var currentProperties = GetElementProperties(htmlLiElement, inheritedProperties, out localProperties,
stylesheet, sourceContext);
var xamlListItemElement = xamlListElement.OwnerDocument.CreateElement(null, XamlListItem,
XamlNamespace);
// TODO: process local properties for li element
// Process children of the ListItem
for (var htmlChildNode = htmlLiElement.FirstChild;
htmlChildNode != null;
htmlChildNode = htmlChildNode?.NextSibling)
{
htmlChildNode = AddBlock(xamlListItemElement, htmlChildNode, currentProperties, stylesheet,
sourceContext);
}