Skip to content

Commit

Permalink
[Java.Interop.Tools.JavaSource] Fix up common HTML tags
Browse files Browse the repository at this point in the history
Fixes: #1071

The latest API docs update contained a couple dozen parsing issues due
to broken `<code></code>` elements, reserved inline characters in
`<code>` elements, and other issues.  These issues have been fixed by
no longer attempting to parse `<code>` elements with Irony.  Instead, an
HTML processing step has been added which replaces, removes, or decodes
well known HTML tags after the javadoc is parsed.

Parsing for `<a/>` elements has also been updated to fix all 83 cases
where `href` attribute parsing would fail.  Now when we we encounter an
`<a/>` element that points to code or a local path we will only include
the element value in the javadoc, and not the full `href` attribute.

Readability of our generated docs should be improved by both of these
changes, as there will be fewer encoded character entities in places
where they are not necessary.
  • Loading branch information
pjcollins committed Jun 13, 2023
1 parent 32b920f commit d442c15
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace Java.Interop.Tools.JavaSource {
public partial class SourceJavadocToXmldocGrammar {

public class HtmlBnfTerms {

internal HtmlBnfTerms ()
{
}
Expand All @@ -25,7 +26,6 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
AllHtmlTerms.Rule = TopLevelInlineDeclaration
| PBlockDeclaration
| PreBlockDeclaration
| IgnorableElementDeclaration
;

var inlineDeclaration = new NonTerminal ("<html inline decl>", ConcatChildNodes) {
Expand All @@ -37,7 +37,6 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
| FormCtrlDeclaration
*/
| InlineHyperLinkDeclaration
| CodeElementDeclaration
| grammar.InlineTagsTerms.AllInlineTerms
| UnknownHtmlElementStart
,
Expand Down Expand Up @@ -100,48 +99,53 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
parseNode.AstNode = p;
};

InlineHyperLinkDeclaration.Rule = InlineHyperLinkOpenTerm + InlineDeclarations + CreateEndElement ("a", grammar, optional: true);
InlineHyperLinkDeclaration.Rule = HtmlAElementStart + InlineDeclarations + CreateEndElement ("a", grammar, optional: true);
InlineHyperLinkDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var unparsedAElementValue = string.Empty;
foreach (var cn in parseNode.ChildNodes) {
if (cn.ChildNodes?.Count > 1) {
foreach (var gcn in cn.ChildNodes) {
unparsedAElementValue += gcn.AstNode?.ToString ();
}
} else {
unparsedAElementValue += cn.AstNode?.ToString ();
}
}
var nodesAsString = GetChildNodesAsString (parseNode);
var tokenValue = parseNode.ChildNodes [0].Token.Text;
int stopIndex = nodesAsString.IndexOf ('>');
var seeElement = TryParseHRef (unparsedAElementValue);
if (seeElement == null)
seeElement = TryParseHRef (WebUtility.HtmlDecode (unparsedAElementValue), logError: true);
if (stopIndex == -1 || !tokenValue.Contains ("href", StringComparison.OrdinalIgnoreCase)) {
parseNode.AstNode = new XText (nodesAsString);
return;
}
var hrefValue = seeElement?.Attribute ("href")?.Value ?? string.Empty;
if (!string.IsNullOrEmpty (hrefValue) &&
(hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) {
parseNode.AstNode = seeElement;
var attributeName = parseNode.ChildNodes [0].Term.Name;
var attributeValue = nodesAsString.Substring (0, stopIndex).Trim ().Trim('\'', '"');
var elementValue = nodesAsString.Substring (stopIndex + 1);
if (!string.IsNullOrEmpty (attributeValue) &&
(attributeValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || attributeValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) {
var unparsed = $"<see href=\"{attributeValue}\">{elementValue}</see>";
XNode? seeElement = TryParseElement (unparsed);
if (seeElement == null) {
// Try to parse with HTML entities decoded
seeElement = TryParseElement (WebUtility.HtmlDecode (unparsed));
if (seeElement == null) {
// Finally, try to parse with only the element value encoded
seeElement = TryParseElement ($"<see href=\"{attributeValue}\">{WebUtility.HtmlEncode (elementValue)}</see>", logError: true);
}
}
parseNode.AstNode = seeElement ?? new XText (nodesAsString);
} else {
// TODO: Need to convert relative paths or code references to appropriate CREF value.
parseNode.AstNode = new XText (unparsedAElementValue);
parseNode.AstNode = new XText (elementValue);
}
};
}

// Start to trim out unusable HTML elements/tags, but not any inner values
IgnorableElementDeclaration.Rule =
CreateStartElementIgnoreAttribute ("a", "name") + InlineDeclarations + CreateEndElement ("a", grammar, optional: true)
| CreateStartElementIgnoreAttribute ("a", "id") + InlineDeclarations + CreateEndElement ("a", grammar, optional: true)
;
IgnorableElementDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var aElementValue = new XText (parseNode.ChildNodes [1].AstNode.ToString () ?? string.Empty);
parseNode.AstNode = aElementValue;
};

CodeElementDeclaration.Rule = CreateStartElement ("code", grammar) + InlineDeclarations + CreateEndElement ("code", grammar);
CodeElementDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var target = parseNode.ChildNodes [1].AstNode;
parseNode.AstNode = new XElement ("c", target);
};
static string GetChildNodesAsString (ParseTreeNode parseNode)
{
var unparsed = string.Empty;
foreach (var cn in parseNode.ChildNodes) {
if (cn.ChildNodes?.Count > 1) {
foreach (var gcn in cn.ChildNodes) {
unparsed += gcn.AstNode?.ToString ();
}
} else {
unparsed += cn.AstNode?.ToString ();
}
}
return unparsed;
}

static IEnumerable<XElement> GetParagraphs (ParseTreeNodeList children)
Expand Down Expand Up @@ -184,13 +188,13 @@ static IEnumerable<XElement> GetParagraphs (ParseTreeNodeList children)
}
}

static XElement? TryParseHRef (string unparsedAElementValue, bool logError = false)
static XElement? TryParseElement (string unparsed, bool logError = false)
{
try {
return XElement.Parse ($"<see href={unparsedAElementValue}</see>");
return XElement.Parse (unparsed);
} catch (Exception x) {
if (logError)
Console.Error.WriteLine ($"## Unable to parse HTML element: <see href={unparsedAElementValue}</see>\n{x.GetType ()}: {x.Message}");
Console.Error.WriteLine ($"## Unable to parse HTML element: `{unparsed}`\n{x.GetType ()}: {x.Message}");
return null;
}
}
Expand Down Expand Up @@ -221,15 +225,12 @@ static IEnumerable<XElement> GetParagraphs (ParseTreeNodeList children)
public readonly NonTerminal PBlockDeclaration = new NonTerminal (nameof (PBlockDeclaration), ConcatChildNodes);
public readonly NonTerminal PreBlockDeclaration = new NonTerminal (nameof (PreBlockDeclaration), ConcatChildNodes);
public readonly NonTerminal InlineHyperLinkDeclaration = new NonTerminal (nameof (InlineHyperLinkDeclaration), ConcatChildNodes);
public readonly NonTerminal IgnorableElementDeclaration = new NonTerminal (nameof (IgnorableElementDeclaration), ConcatChildNodes);
public readonly NonTerminal CodeElementDeclaration = new NonTerminal (nameof (CodeElementDeclaration), ConcatChildNodes);

public readonly Terminal InlineHyperLinkOpenTerm = new RegexBasedTerminal ("<a href=", @"(?i)<a\s*href\s*=") {
public readonly Terminal HtmlAElementStart = new RegexBasedTerminal ("<a attr=", @"(?i)<a\s*.*=") {
AstConfig = new AstNodeConfig {
NodeCreator = (context, parseNode) => parseNode.AstNode = "",
},
};

public readonly Terminal UnknownHtmlElementStart = new UnknownHtmlElementStartTerminal (nameof (UnknownHtmlElementStart)) {
AstConfig = new AstNodeConfig {
NodeCreator = (context, parseNode) => parseNode.AstNode = parseNode.Token.Value.ToString (),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
| LiteralDeclaration
| SeeDeclaration
| ValueDeclaration
| IgnorableDeclaration
| InlineParamDeclaration
| IgnorableDeclaration
;

CodeDeclaration.Rule = grammar.ToTerm ("{@code") + InlineValue + "}";
Expand Down Expand Up @@ -109,15 +109,6 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
}
};

// Inline content may contain reserved characters with no tags or special parsing rules, do not throw when encountering them
IgnorableDeclaration.Rule = grammar.ToTerm ("@ ")
| grammar.ToTerm ("{")
| grammar.ToTerm ("}")
;
IgnorableDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
parseNode.AstNode = new XText (parseNode.ChildNodes [0].Term.Name.Trim ());
};

InlineParamDeclaration.Rule = grammar.ToTerm ("{@param") + InlineValue + "}";
InlineParamDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var target = parseNode.ChildNodes [1].AstNode;
Expand Down Expand Up @@ -156,9 +147,37 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
// https://docs.oracle.com/javase/7/docs/technotes/tools/windows/javadoc.html#value
public readonly NonTerminal ValueDeclaration = new NonTerminal (nameof (ValueDeclaration));

public readonly NonTerminal IgnorableDeclaration = new NonTerminal (nameof (IgnorableDeclaration));

public readonly NonTerminal InlineParamDeclaration = new NonTerminal (nameof (InlineParamDeclaration));

public readonly Terminal IgnorableDeclaration = new IgnorableCharTerminal (nameof (IgnorableDeclaration)) {
AstConfig = new AstNodeConfig {
NodeCreator = (context, parseNode) => parseNode.AstNode = parseNode.Token.Value.ToString (),
},
};

}
}

class IgnorableCharTerminal : Terminal
{
public IgnorableCharTerminal (string name)
: base (name)
{
Priority = TerminalPriority.Low - 1;
}

public override Token? TryMatch (ParsingContext context, ISourceStream source)
{
var startChar = source.Text [source.Location.Position];
if (startChar != '@'
&& startChar != '{'
&& startChar != '}'
) {
return null;
}
source.PreviewPosition += 1;
return source.CreateToken (OutputTerminal, startChar);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,12 @@ public void HyperLinkDeclaration ()

r = p.Parse ("<a href=\"AutofillService.html#FieldClassification\">field classification</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("\"AutofillService.html#FieldClassification\"&gt;field classification",
r.Root.AstNode.ToString ());
}
Assert.AreEqual ("field classification", r.Root.AstNode.ToString ());

[Test]
public void CodeElementDeclaration ()
{
var p = CreateParser (g => g.HtmlTerms.CodeElementDeclaration);

var r = p.Parse ("<code>input.position()</code>");
r = p.Parse ("<a href='https://material.io/guidelines/components/progress-activity.html#progress-activity-types-of-indicators'>\nProgress & activity</a>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("<c>input.position()</c>", r.Root.AstNode.ToString ());
Assert.AreEqual ("<see href=\"https://material.io/guidelines/components/progress-activity.html#progress-activity-types-of-indicators\">\nProgress &amp; activity</see>",
r.Root.AstNode.ToString ());
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,16 +144,16 @@ more description here.</para>
</member>",
},
new ParseResult {
Javadoc = "Something {@link #method}: description, \"<code>declaration</code>\" or \"<code>another declaration</code>\".\n\n@apiSince 1\n",
Javadoc = "Something {@link #method}: description.\n\n@apiSince 1\n",
FullXml = @"<member>
<summary>Something <c>#method</c>: description, ""<c>declaration</c>"" or ""<c>another declaration</c>"".</summary>
<summary>Something <c>#method</c>: description.</summary>
<remarks>
<para>Something <c>#method</c>: description, ""<c>declaration</c>"" or ""<c>another declaration</c>"".</para>
<para>Something <c>#method</c>: description.</para>
<para>Added in API level 1.</para>
</remarks>
</member>",
IntelliSenseXml = @"<member>
<summary>Something <c>#method</c>: description, ""<c>declaration</c>"" or ""<c>another declaration</c>"".</summary>
<summary>Something <c>#method</c>: description.</summary>
</member>",
},
new ParseResult {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public sealed class JavadocInfo {
public XElement[] Copyright { get; set; }

public XmldocStyle XmldocStyle { get; set; }

public string DocRootReplacement { get; set; }

string MemberDescription;
Expand Down Expand Up @@ -202,6 +203,7 @@ static void AddNode (ICollection<string> comments, XNode node)
if (node == null)
return;
var contents = node.ToString ();
contents = FixUpHtml (contents);

var lines = new StringReader (contents);
string line;
Expand All @@ -210,6 +212,43 @@ static void AddNode (ICollection<string> comments, XNode node)
}
}

// Remove, replace, or decode common HTML tags to improve what is displayed in the IDE and online.
static string FixUpHtml (string javadocContent)
{
var replacements = new Dictionary<string, string> {
{ "&lt;blockquote&gt;", "" }, { "&lt;/blockquote&gt;", "" },
{ "&lt;cite&gt;", "<i>" }, { "&lt;/cite&gt;", "</i>" },
{ "&lt;code&gt;", "<c>" }, { "&lt;/code&gt;", "</c>" },
{ "&lt;dd&gt;", "" }, { "&lt;/dd&gt;", "" },
{ "&lt;dl&gt;", "" }, { "&lt;/dl&gt;", "" },
{ "&lt;dt&gt;", "" }, { "&lt;/dt&gt;", "" },
{ "&lt;em&gt;", "<i>" }, { "&lt;/em&gt;", "</i>" },
{ "&lt;h1&gt;", "" }, { "&lt;/h1&gt;", "" },
{ "&lt;h2&gt;", "" }, { "&lt;/h2&gt;", "" },
{ "&lt;h3&gt;", "" }, { "&lt;/h3&gt;", "" },
{ "&lt;h4&gt;", "" }, { "&lt;/h4&gt;", "" },
{ "&lt;h5&gt;", "" }, { "&lt;/h5&gt;", "" },
{ "&lt;h6&gt;", "" }, { "&lt;/h6&gt;", "" },
{ "&lt;li&gt;", "" }, { "&lt;/li&gt;", "" },
{ "&lt;ol&gt;", "" }, { "&lt;/ol&gt;", "" },
{ "&lt;strong&gt;", "<b>" }, { "&lt;/strong&gt;", "</b>" },
{ "&lt;sub&gt;", "" }, { "&lt;/sub&gt;", "" },
{ "&lt;sup&gt;", "" }, { "&lt;/sup&gt;", "" },
{ "&lt;table", "<table" }, { "&lt;/table&gt;", "</table>" },
{ "&lt;tbody&gt;", "<tbody>" }, { "&lt;/tbody&gt;", "</tbody>" },
{ "&lt;td&gt;", "<td>" }, { "&lt;/td&gt;", "</td>" },
{ "&lt;th&gt;", "<th>" }, { "&lt;/th&gt;", "</th>" },
{ "&lt;thead&gt;", "<thead>" }, { "&lt;/thead&gt;", "</thead>" },
{ "&lt;tr&gt;", "<tr>" }, { "&lt;/tr&gt;", "</tr>" },
{ "&lt;ul&gt;", "" }, { "&lt;/ul&gt;", "" },
};

foreach (var r in replacements) {
javadocContent = javadocContent.Replace (r.Key, r.Value);
}
return javadocContent;
}

static void PrintMessages (ParseTree tree, TextWriter writer)
{
var lines = GetLines (tree.SourceText);
Expand Down

0 comments on commit d442c15

Please sign in to comment.