Skip to content

Commit

Permalink
Dump CSS extracts as JSON files (#116)
Browse files Browse the repository at this point in the history
Following discussions in #113, this update dumps the extracted CSS definitions
as JSON files (as opposed to `.cvds` files) in the `css` folder. The JSON file
is a JSON object with `properties`, `descriptors` and `valuespaces` properties
that list the definitions extracted. A typical example:

```json
{
  "properties": {
    "animation-timeline": {
      "name": "animation-timeline",
      "value": "<single-animation-timeline>#",
      "initial": "auto",
      "appliesTo": "all elements, ::before and ::after pseudo-elements",
      "inherited": "none",
      "percentages": "N/A",
      "media": "interactive",
      "computedValue": "As specified",
      "canonicalOrder": "per grammar",
      "animatable": "no"
    }
  },
  "descriptors": {},
  "valuespaces": {
    "<single-animation-timeline>": {
      "value": "auto | scroll([element(<id-selector>)[, <scroll-direction>[, <scroll-offset>[, <scroll-offset>[, <time>[, <single-animation-fill-mode>]]]]]])"
    },
    "<scroll-direction>": {
      "value": "auto | block | inline | horizontal | vertical"
    },
    "<scroll-offset>": {
      "value": "<length> | <percentage> | auto"
    }
  }
}
```

The update also makes sure that all properties extracted from CSS specs use a
lower camel case convention.

When multiple levels of a spec exists, only the contents of the latest level
are dumped for now. There was a bug in the code that made it miss some cases
(where the URL of the different levels did not follow the same pattern) and
that could create file writing conflicts. This should now have been fixed.

Also, the logic that creates CSS and IDL dumps has been separated to dump the
latest level that actually has some related definitions (for instance CSS
Animations Level 2 is currently a delta spec and does not define any IDL,
whereas the first level does). Whether we'll want to keep that behavior later
on for such delta specs is an open question.
  • Loading branch information
tidoust authored Aug 13, 2018
1 parent d87195f commit cea5c00
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 81 deletions.
148 changes: 76 additions & 72 deletions src/cli/crawl-specs.js
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,14 @@ async function crawlSpec(spec, crawlOptions) {
.then(css => {
Object.keys(css.properties || {}).forEach(prop => {
try {
css.properties[prop].parsedValue = cssDfnParser.parsePropDefValue(css.properties[prop].Value || css.properties[prop]["New values"]);
css.properties[prop].parsedValue = cssDfnParser.parsePropDefValue(css.properties[prop].value || css.properties[prop].newValues);
} catch (e) {
css.properties[prop].valueParseError = e.message;
}
});
Object.keys(css.descriptors || {}).forEach(desc => {
try {
css.descriptors[desc].parsedValue = cssDfnParser.parsePropDefValue(css.descriptors[desc].Value);
css.descriptors[desc].parsedValue = cssDfnParser.parsePropDefValue(css.descriptors[desc].value);
} catch (e) {
css.descriptors[desc].valueParseError = e.message;
}
Expand Down Expand Up @@ -430,6 +430,11 @@ function getShortname(spec) {
* Note results are sorted by URL to guarantee that the crawl report produced
* will always follow the same order.
*
* The function also dumps raw CSS/IDL extracts for each spec to the css and
* idl folders. Note that if the crawl contains multiple levels of a given spec
* that contain the same type of definitions (css, or idl), the dump is for the
* latest level.
*
* @function
* @param {Object} crawlInfo Crawl information structure, contains the title
* and the list of specs to crawl
Expand All @@ -455,82 +460,81 @@ async function saveResults(crawlInfo, crawlOptions, data, folder) {
}));
});

const saveCssAndIdl = async spec => {
if (spec.flags.idl && spec.idl && spec.idl.idl) {
let idlHeader = `
// GENERATED CONTENT - DO NOT EDIT
// Content was automatically extracted by Reffy into reffy-reports
// (https://github.com/tidoust/reffy-reports)
// Source: ${spec.title} (${spec.crawled})`;
idlHeader = idlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
let idl = spec.idl.idl
.replace(/\s+$/gm, '\n')
.replace(/\t/g, ' ')
.trim();
idl = idlHeader + idl + '\n';
delete spec.idl.idl;
await new Promise(resolve => fs.writeFile(
path.join(idlFolder, getShortname(spec) + '.idl'),
idl,
err => {
if (err) console.log(err);
return resolve();
}));
}
const saveIdl = async spec => {
let idlHeader = `
// GENERATED CONTENT - DO NOT EDIT
// Content was automatically extracted by Reffy into reffy-reports
// (https://github.com/tidoust/reffy-reports)
// Source: ${spec.title} (${spec.crawled})`;
idlHeader = idlHeader.replace(/^\s+/gm, '').trim() + '\n\n';
let idl = spec.idl.idl
.replace(/\s+$/gm, '\n')
.replace(/\t/g, ' ')
.trim();
idl = idlHeader + idl + '\n';
delete spec.idl.idl;
await new Promise(resolve => fs.writeFile(
path.join(idlFolder, getShortname(spec) + '.idl'),
idl,
err => {
if (err) console.log(err);
return resolve();
}));
};

if (spec.flags.css && spec.css && (
(Object.keys(spec.css.properties || {}).length > 0) ||
(Object.keys(spec.css.descriptors || {}).length > 0) ||
(Object.keys(spec.css.valuespaces || {}).length > 0))) {
let properties = (Object.values(spec.css.properties || {}))
.filter(s => s.Name && (s.Value || s['New values']))
.map(s => s.Value ? `${s.Name} = ${s.Value}` :
`${s.Name} |= ${s['New values']}`);
let descriptors = (Object.values(spec.css.descriptors || {}))
.filter(s => s.Name && (s.Value || s['New values']))
.map(s => s.Value ? `${s.Name} = ${s.Value}` :
`${s.Name} |= ${s['New values']}`);
let valuespaces = (Object.keys(spec.css.valuespaces || {}))
.filter(s => spec.css.valuespaces[s].value)
.map(s => `${s} = ${spec.css.valuespaces[s].value}`);
let parts = properties.concat(descriptors, valuespaces);

let css = parts.join('\n\n')
.replace(/\s+$/gm, '\n')
.replace(/\t/g, ' ')
.trim();
css = css + '\n';
await new Promise(resolve => fs.writeFile(
path.join(cssFolder, getShortname(spec) + '.cvds'),
css,
err => {
if (err) console.log(err);
return resolve();
}));
const saveCss = async spec => {
let css = JSON.stringify(spec.css, (key, val) => {
if ((key === 'parsedValue') || (key === 'valueParseError')) {
return undefined;
}
else {
return val;
}
}, 2) + '\n';
await new Promise(resolve => fs.writeFile(
path.join(cssFolder, getShortname(spec) + '.json'),
css,
err => {
if (err) console.log(err);
return resolve();
}));
};

// Helper function that returns true when the given spec is is the latest
// level of that spec in the crawl for the given type of content
// ("css" or "idl")
const isLatestLevel = (spec, flag) => {
if (!spec.url.match(/-\d\/$/)) {
// Handle special CSS 2.1 / CSS 2.2 spec which does not
// follow the same naming conventions as other CSS specs
return !spec.url.match(/CSS2\/$/i) ||
!data.find(s => s.url.match(/CSS22\/$/i));
}
let level = spec.url.match(/-(\d)\/$/)[1];
let moreRecent = data.find(s =>
s.flags[flag] &&
(getShortname(s) === getShortname(spec)) &&
s.url.match(/-\d\/$/) &&
(s.url.match(/-(\d)\/$/)[1] > level));
return !moreRecent;
};

// Only save CSS/IDL definitions for the last level of specifications
// when the crawl contains multiple levels
// (Note the code below assumes that levels are below 10)
// Save IDL dumps for the latest level of a spec to the idl folder
await Promise.all(data
.filter(spec => {
if (!spec.url.match(/-\d\/$/)) {
// Handle special CSS 2.1 / CSS 2.2 spec which does not
// follow the same naming conventions as other CSS specs
return !spec.url.match(/CSS2\/$/i) ||
!data.find(s => s.url.match(/CSS22\/$/i));
}
let start = spec.url.split(/-\d\/$/)[0];
let level = spec.url.match(/-(\d)\/$/)[1];
let moreRecent = data.find(s =>
s.url.startsWith(start) &&
s.url.match(/-\d\/$/) &&
(s.url.match(/-(\d)\/$/)[1] > level));
return !moreRecent;
})
.map(saveCssAndIdl));
.filter(spec => spec.flags.idl && spec.idl && spec.idl.idl)
.filter(spec => isLatestLevel(spec, 'idl'))
.map(saveIdl));

// Save CSS dumps for the latest level of a spec to the css folder
await Promise.all(data
.filter(spec => spec.flags.css && spec.css && (
(Object.keys(spec.css.properties || {}).length > 0) ||
(Object.keys(spec.css.descriptors || {}).length > 0) ||
(Object.keys(spec.css.valuespaces || {}).length > 0)))
.filter(spec => isLatestLevel(spec, 'css'))
.map(saveCss));

// Save all results to the crawl.json file
let reportFilename = path.join(folder, 'crawl.json');
return new Promise((resolve, reject) =>
fs.readFile(reportFilename, function(err, content) {
Expand Down
34 changes: 25 additions & 9 deletions src/cli/extract-cssdfn.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,22 @@ const urlOrDom = require('../lib/util').urlOrDom;
const getDocumentAndGenerator = require('../lib/util').getDocumentAndGenerator;


/**
* Converts a definition label as it appears in a CSS spec to a lower camel
* case property name.
*
* @param {String} label Definition label
* @return {String} lower camel case property name for the label
*/
const dfnLabel2Property = label => label.trim()
.replace(/:/, '')
.split(' ')
.map((str, idx) => (idx === 0) ?
str.toLowerCase() :
str.charAt(0).toUpperCase() + str.slice(1))
.join('');


/**
* Extract a CSS definition from a table
*
Expand All @@ -26,7 +42,7 @@ const extractTableDfn = table => {
let res = {};
const lines = [...table.querySelectorAll('tr')]
.map(line => Object.assign({
name: line.querySelector(':first-child').textContent.trim().replace(/:/, ''),
name: dfnLabel2Property(line.querySelector(':first-child').textContent),
value: line.querySelector('td:last-child').textContent.trim().replace(/\s+/g, ' ')
}));
for (let prop of lines) {
Expand All @@ -43,10 +59,10 @@ const extractTableDfn = table => {
*/
const extractDlDfn = dl => {
let res = {};
res.Name = dl.querySelector('dt').textContent.replace(/'/g, '').trim();
res.name = dl.querySelector('dt').textContent.replace(/'/g, '').trim();
const lines = [...dl.querySelectorAll('dd table tr')]
.map(line => Object.assign({
name: line.querySelector(':first-child').textContent.trim().replace(/:/, ''),
name: dfnLabel2Property(line.querySelector(':first-child').textContent),
value: line.querySelector('td:last-child').textContent.trim().replace(/\s+/g, ' ')
}));
for (let prop of lines) {
Expand All @@ -63,17 +79,17 @@ const extractDfns = (doc, selector, extractor) => {
let res = {};
[...doc.querySelectorAll(selector)]
.map(extractor)
.filter(dfn => !!dfn.Name)
.map(dfn => dfn.Name.split(',').map(name => Object.assign({},
dfn, { Name: name.trim() })))
.filter(dfn => !!dfn.name)
.map(dfn => dfn.name.split(',').map(name => Object.assign({},
dfn, { name: name.trim() })))
.reduce((acc, val) => acc.concat(val), [])
.forEach(dfn => {
if ((dfn.Name === 'property-name') ||
(dfn.Name === '--*')) {
if ((dfn.name === 'property-name') ||
(dfn.name === '--*')) {
// Ignore sample definition && custom properties definition
return;
}
res[dfn.Name] = dfn;
res[dfn.name] = dfn;
});
return res;
};
Expand Down

0 comments on commit cea5c00

Please sign in to comment.