Skip to content

Commit

Permalink
[Java.Interop.Tools.JavaSource] Support html tags with attributes (#1286
Browse files Browse the repository at this point in the history
)

Context: dotnet/android#9647

dotnet/android#9647 attempted to import API docs for API 35, and
produced the following warning:

	The following issues were found, review the build log for more details:
	>   ## Unable to translate remarks for android/app/admin/DevicePolicyManager:
	>   JavadocImport-Error (31:39): Syntax error, expected: </p>, </P>, #PCDATA, <tt>, <TT>, <i>, <I>, <a attr=, <code>, {@code, {@docroot}, {@inheritdoc}, {@link, {@linkplain, {@literal, {@see, {@value}, {@value, IgnorableDeclaration, {@param, UnknownHtmlElementStart, <p>, <P>, <pre , @author, @apiSince, @deprecated, @deprecatedSince, @exception, @inheritdoc, @hide, @param, @return, @see, @Serialdata, @serialField, @SInCE, @throws, @[unknown], @Version
	    <li>A <i id="deviceowner">Device Owner</i>, which only ever exists on the
	                                          ^

Parsing logic fails here because the `<i>` tag has an `id` attribute
_and_ is present in an open `<p>` tag.

Turns Out™ that HTML allows attributes on nearly *everything*; e.g.
from [§3.2.3 Global attributes][0]: 

> The following attributes are common to and may be specified on all
> [HTML elements](https://dev.w3.org/html5/spec-LC/infrastructure.html#html-elements)
> (even those not defined in this specification):
>   * …
>   * `id`

Given this, it doesn't make sense for `CreateStartElement()` to not
allow any attributes.  Update `CreateStartElement()` so that *all*
elements *ignore* any specified attributes (by default), which
allows `<i id="deviceowner">Device Owner</i>` to work.

The regex used has also been improved to include word boundaries
around the tag name to make sure that it does not match unexpected
elements.

[0]: https://dev.w3.org/html5/spec-LC/elements.html#global-attributes
  • Loading branch information
pjcollins authored Jan 7, 2025
1 parent 2c06b3c commit fe00cef
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
var fontstyle_i = CreateHtmlToCrefElement (grammar, "i", "i", InlineDeclarations, optionalEnd: true);

var preText = new PreBlockDeclarationBodyTerminal ();
PreBlockDeclaration.Rule = CreateStartElementIgnoreAttribute ("pre") + preText + CreateEndElement ("pre", grammar, optional: true);
PreBlockDeclaration.Rule = CreateStartElement ("pre") + preText + CreateEndElement ("pre", grammar, optional: true);
PreBlockDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
if (!grammar.ShouldImport (ImportJavadoc.Remarks)) {
parseNode.AstNode = "";
Expand All @@ -82,7 +82,7 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar)
FontStyleDeclaration.Rule = fontstyle_tt | fontstyle_i;

PBlockDeclaration.Rule =
CreateStartElement ("p", grammar) + InlineDeclarations + CreateEndElement ("p", grammar, optional:true)
CreateStartElement ("p") + InlineDeclarations + CreateEndElement ("p", grammar, optional:true)
;
PBlockDeclaration.AstConfig.NodeCreator = (context, parseNode) => {
var remarks = FinishParse (context, parseNode).Remarks;
Expand Down Expand Up @@ -260,7 +260,7 @@ static string GetChildNodesAsString (ParseTreeNode parseNode)

static NonTerminal CreateHtmlToCrefElement (Grammar grammar, string htmlElement, string crefElement, BnfTerm body, bool optionalEnd = false)
{
var start = CreateStartElement (htmlElement, grammar);
var start = CreateStartElement (htmlElement);
var end = CreateEndElement (htmlElement, grammar, optionalEnd);
var nonTerminal = new NonTerminal ("<" + htmlElement + ">", ConcatChildNodes) {
Rule = start + body + end,
Expand All @@ -275,28 +275,15 @@ static NonTerminal CreateHtmlToCrefElement (Grammar grammar, string htmlElement,
return nonTerminal;
}

static NonTerminal CreateStartElement (string startElement, Grammar grammar)
static RegexBasedTerminal CreateStartElement (string startElement, string attribute = "")
{
var start = new NonTerminal ("<" + startElement + ">", nodeCreator: (context, parseNode) => parseNode.AstNode = "") {
Rule = grammar.ToTerm ("<" + startElement + ">") | "<" + startElement.ToUpperInvariant () + ">",
};
return start;
}

static RegexBasedTerminal CreateStartElementIgnoreAttribute (string startElement, string attribute)
{
return new RegexBasedTerminal ($"<{startElement} {attribute}", $@"(?i)<{startElement}\s*{attribute}[^>]*>") {
return new RegexBasedTerminal ($"<{startElement} {attribute}>", $@"(?i)<\b{startElement}\b\s*{attribute}[^>]*>") {
AstConfig = new AstNodeConfig {
NodeCreator = (context, parseNode) => parseNode.AstNode = "",
},
};
}

static RegexBasedTerminal CreateStartElementIgnoreAttribute (string startElement)
{
return CreateStartElementIgnoreAttribute (startElement, "");
}

static NonTerminal CreateEndElement (string endElement, Grammar grammar, bool optional = false)
{
var end = new NonTerminal (endElement, nodeCreator: (context, parseNode) => parseNode.AstNode = "") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ public void PBlockDeclaration ()
r = p.Parse("<p>r= <em>unknown</em> text");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("<para>r= &lt;em&gt;unknown&lt;/em&gt; text</para>", r.Root.AstNode.ToString ());

r = p.Parse ("<p>For <li>A <i id=\"deviceowner\">Device Owner</i>");
Assert.IsFalse (r.HasErrors (), DumpMessages (r, p));
Assert.AreEqual ("<para>For &lt;li&gt;A <i>Device Owner</i></para>", r.Root.AstNode.ToString ());
}

[Test]
Expand Down

0 comments on commit fe00cef

Please sign in to comment.