Skip to content

Commit

Permalink
Ignore dfns that have an invalid type (#659)
Browse files Browse the repository at this point in the history
This update restricts dfns extraction to definitions that have a valid type,
as defined in:
https://tabatkins.github.io/bikeshed/#dfn-types
(+ `namespace` and `event` that are valid but do not yet appear in the doc)

In practice, the only invalid type that appears in Editor's Drafts is `idl`,
which ReSpec uses to flag internal slots. To avoid losing definitions while
ReSpec gets fixed and while the changes propagate to all specs and /TR versions
of the spec, the code automatically converts `idl` definition types to
`attribute` or `method`.

Data validation is something that we would typically do at a later stage,
through anomaly reports, or through patches as we produce packages. That said,
dfns are more time sensitive than other types of data that Reffy extracts from
the specs and the validation process would need to be semi-manual.

Dfns with an invalid type get reported as warnings to the console.

Fixes #658.
  • Loading branch information
tidoust authored Jun 28, 2021
1 parent b92cb6e commit c902c1e
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 3 deletions.
87 changes: 84 additions & 3 deletions src/browserlib/extract-dfns.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,77 @@ import {parse} from "../../node_modules/webidl2/index.js";
* @return {Array(Object)} An Array of definitions
*/

function definitionMapper(el, idToHeading) {
function normalize(str) {
return str.trim().replace(/\s+/g, ' ');
function normalize(str) {
return str.trim().replace(/\s+/g, ' ');
}

// Valid types defined in https://tabatkins.github.io/bikeshed/#dfn-types
// (+ "namespace" and "event" which are not yet in the doc)
function hasValidType(el) {
const validDfnTypes = [
// CSS types
'property',
'descriptor',
'value',
'type',
'at-rule',
'function',
'selector',

// Web IDL types
'namespace',
'interface',
'constructor',
'method',
'argument',
'attribute',
'callback',
'dictionary',
'dict-member',
'enum',
'enum-value',
'exception',
'const',
'typedef',
'stringifier',
'serializer',
'iterator',
'maplike',
'setlike',
'extended-attribute',
'event',

// Element types
'element',
'element-state',
'element-attr',
'attr-value',


// URL scheme
'scheme',

// HTTP header
'http-header',

// Grammar type
'grammar',

// "English" terms
'abstract-op',
'dfn'
];

const type = el.getAttribute('data-dfn-type') ?? 'dfn';
const isValid = validDfnTypes.includes(type);
if (!isValid) {
console.warn('[reffy]', `"${type}" is an invalid dfn type for "${normalize(el.textContent)}"`);
}
return isValid;
}


function definitionMapper(el, idToHeading) {
let definedIn = 'prose';
const enclosingEl = el.closest('dt,pre,table,h1,h2,h3,h4,h5,h6,.note,.example') || el;
switch (enclosingEl.nodeName) {
Expand Down Expand Up @@ -140,6 +206,21 @@ export default function (spec, idToHeading = {}) {
}

return [...document.querySelectorAll(definitionsSelector)]
.map(node => {
// 2021-06-21: Temporary preprocessing of invalid "idl" dfn type (used for
// internal slots) while fix for https://github.com/w3c/respec/issues/3644
// propagates to all EDs and /TR specs. To be dropped once crawls no
// longer produce warnings.
if (node.getAttribute('data-dfn-type') === 'idl') {
const linkingText = node.hasAttribute('data-lt') ?
node.getAttribute('data-lt').split('|').map(normalize) :
[normalize(node.textContent)];
node.setAttribute('data-dfn-type', linkingText[0].endsWith(')') ? 'method' : 'attribute');
console.warn('[reffy]', `Fixed invalid "idl" dfn type "${normalize(node.textContent)}"`);
}
return node;
})
.filter(hasValidType)
.map(node => definitionMapper(node, idToHeading));
}

Expand Down
9 changes: 9 additions & 0 deletions src/lib/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,15 @@ async function processSpecification(spec, callback, args, counter) {
await isReady();
});

// Capture and report Reffy's browserlib warnings
page.on('console', msg => {
const text = msg.text();
if (text.startsWith('[reffy] ')) {
console.warn(spec.url, `[${msg.type()}]`, msg.text().substr('[reffy] '.length));
}
});

// Capture and report when page throws an error
page.on('pageerror', err => {
console.error(err);
});
Expand Down
12 changes: 12 additions & 0 deletions tests/extract-dfns.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ const tests = [
html: "<dfn data-lt='foo \n |\nbar' id=foo>Foo</dfn>",
changesToBaseDfn: [{linkingText: ["foo", "bar"]}]
},
{title: "ignores dfns with an invalid data-dfn-type",
html: "<dfn id=foo data-dfn-type=invalidtype>Foo</dfn>",
changesToBaseDfn: []
},
{title: "automatically fixes internal slots dfns with an invalid 'idl' data-dfn-type",
html: "<dfn id=foo data-dfn-type=idl>Foo</dfn>",
changesToBaseDfn: [{type: "attribute", access: "public"}]
},
{title: "automatically fixes internal methods with an invalid 'idl' data-dfn-type",
html: "<dfn id=foo data-dfn-type=idl>Foo()</dfn>",
changesToBaseDfn: [{ linkingText: [ 'Foo()' ], type: "method", access: "public"}]
},
{title: "handles HTML spec conventions of definitions in headings",
html: '<h6 id="parsing-main-inselect"><span class="secno">12.2.6.4.16</span> The "<dfn>in select</dfn>" insertion mode<a href="#parsing-main-inselect" class="self-link"></a></h6>',
changesToBaseDfn: [{id: "parsing-main-inselect",
Expand Down

0 comments on commit c902c1e

Please sign in to comment.