Skip to content
This repository was archived by the owner on Oct 22, 2022. It is now read-only.

HTMLFileContent and component for extracting IDL fragments #46

Merged
merged 27 commits into from
Jun 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
dd775b5
WIP: Adding HTML Content parsing
m-cheung May 23, 2017
58e5e48
Adding preliminary HTMLParsing and corresponding tests
m-cheung May 29, 2017
32366c5
Merge branch 'htmlContent'
m-cheung May 29, 2017
6376b3d
Renaming HTMLParser to HTMLFileContent and adding additional tests
m-cheung May 30, 2017
959d608
Removing additional files
m-cheung May 30, 2017
b35f750
Adding some additional tests and filtering out examples / notes
m-cheung May 30, 2017
853d12d
Adding additional tests
m-cheung May 30, 2017
0f92a6b
Temporary work on pipeline
m-cheung May 31, 2017
b640239
Merge branch 'master' into htmlContent
m-cheung Jun 1, 2017
e9eb44d
Merge branch 'master' into htmlContent
m-cheung Jun 6, 2017
5bcfc90
Merge branch 'master' into htmlContent
m-cheung Jun 6, 2017
a041087
Refactoring HTMLFileContent to correspond with HTMLLexer changes
m-cheung Jun 7, 2017
5969d4b
Part 2 of refactor due to HTMLLexer changes
m-cheung Jun 7, 2017
ad7669e
Renaming components
m-cheung Jun 8, 2017
dd2bd1d
Removing parsing step from extractor
m-cheung Jun 8, 2017
f812b58
Removing unused variables and fixing comment styling
m-cheung Jun 8, 2017
ff6ad90
Fixing description for HTMLFileContents
m-cheung Jun 8, 2017
c5fb6fb
Removing requirement for HTTPRequest in HTMLFileContents
m-cheung Jun 8, 2017
c736c28
Allowing for nested excluded tags
m-cheung Jun 9, 2017
af4cd9d
Adding support for nested exclude tags
m-cheung Jun 9, 2017
20932a6
Adding Future message with regards to nested pre tags
m-cheung Jun 13, 2017
5f93bbb
Fixing minor formatting problem
m-cheung Jun 13, 2017
30df62a
Additional documentation in IDLFragmentExtractor and HTMLFileContents
m-cheung Jun 14, 2017
7c3d78e
Adding additional documentation to IDLFragmentExtractor
m-cheung Jun 14, 2017
6865625
Formatting and style fixes on HTMLFileContents and IDLFragmentExtractor
m-cheung Jun 14, 2017
2dc37e8
Minor changes to HTMLFileContents and removing unneeded test
m-cheung Jun 26, 2017
c93dec6
Addressing formatting changes in HTMLFileContents
m-cheung Jun 27, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
`lib${sep}org${sep}chromium${sep}webidl${sep}ast${sep}Stringifier.js`,
`lib${sep}org${sep}chromium${sep}webidl${sep}ast${sep}Float.js`,

// HTML File Content and Extractor
`lib${sep}org${sep}chromium${sep}webidl${sep}HTMLFileContents.js`,
`lib${sep}org${sep}chromium${sep}webidl${sep}IDLFragmentExtractor.js`,

// Web IDL parsers
`lib${sep}org${sep}chromium${sep}webidl${sep}BaseParser.js`,
`lib${sep}org${sep}chromium${sep}webidl${sep}Parser.js`,
Expand Down
36 changes: 36 additions & 0 deletions lib/org/chromium/webidl/HTMLFileContents.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
'use strict';

foam.CLASS({
package: 'org.chromium.webidl',
name: 'HTMLFileContents',

documentation: 'An HTML file that stores the raw content of the response.',

ids: ['url', 'timestamp'],

properties: [
{
class: 'String',
name: 'url',
required: true,
},
{
class: 'Date',
name: 'timestamp',
required: true,
},
{
class: 'String',
name: 'contents',
},
{
class: 'Array',
of: 'String',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

documentation?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nits: usually in order: class, of, documentation, name. Please uncomment documentation.

documentation: 'Any array of files referencing the URL of this file.',
name: 'references',
},
],
});
1 change: 0 additions & 1 deletion lib/org/chromium/webidl/IDLFileContents.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ foam.CLASS({
requires: [
'foam.core.Property',
'foam.net.HTTPrequest',
'org.chromium.webidl.IDLFileBase',
],

properties: [
Expand Down
116 changes: 116 additions & 0 deletions lib/org/chromium/webidl/IDLFragmentExtractor.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
'use strict';

foam.CLASS({
package: 'org.chromium.webidl',
name: 'IDLFragmentExtractor',
documentation: 'Extracts IDL Fragments from HTML files.',
requires: [
'foam.parsers.html.HTMLLexer',
'foam.parsers.html.TagType',
],

properties: [
{
name: 'file',
of: 'org.chromium.webidl.HTMLFileContent',
postSet: function(_, file) {
this.idlFragments = this.extract();
},
},
{
name: 'idlFragments',
// description: 'all idlFragments found in the parsed HTML file',
},
],

methods: [
function extract() {
var lexer = this.HTMLLexer.create();
var OPEN = this.TagType.OPEN.name;
var CLOSE = this.TagType.CLOSE.name;
var extractAttr = function(node, attrName) {
var retVal = [];
node.attributes.forEach(function(attr) {
if (attr.name === attrName) {
retVal = retVal.concat(attr.value.split(' '));
}
});
return retVal;
};

var results = lexer.parseString(this.file.contents).value;
if (!results) throw new Error("IDL Parse was not successful.");

var idlFragments = [];
var tagStack = []; // Used for tag matching.
var excludeStack = []; // Used for tracking excluded (example/note) tags.
var tagMatching = true; // Set whe not inside a pre tag of interest.
var content = ''; // Used to group together related content.
for (var i = 0; i < results.length; i++) {
var item = results[i];
var isTag = lexer.Tag.isInstance(item);

// FUTURE: Handle nested pre tags.
// As of this writing, there has not been any IDL fragments
// that has been found within nested pre tags.
if (!tagMatching) {
// Ignoring all tags. Only extracting text content within pre tags.
if (isTag) {
if (item.nodeName === 'pre') {
tagMatching = true;
tagStack.pop();
idlFragments.push(lexer.lib.unescapeString(content));
content = '';
}
} else {
// item is text fragments, so we append it.
content += item;
}
} else if (isTag) {
// Encountered a tag and we are tag matching.
// Perform appropriate action based on class attribute and tagType.
var top = tagStack[tagStack.length - 1];
var classes = extractAttr(item, 'class');
var isIDL = classes && classes.includes('idl');
var isExcluded = function(cls) {
return cls && (cls.includes('example') || cls.includes('note'));
};

if (item.type.name === OPEN) {
if (isExcluded(classes)) {
// Entering the body of an excluded tag.
excludeStack.push(item);
} else if (excludeStack.length === 0
&& item.nodeName === 'pre' && isIDL) {
// Found a <pre class="idl.*">.
// Ignore tags and only extract text now.
tagMatching = false;
}
tagStack.push(item);
} else if (top && item.type.name === CLOSE
&& top.nodeName === item.nodeName) {
// Item is a close tag matching the tag at the top of the stack.
// Aliasing for readability.
var openTag = top;
var closeTag = item;

var openTagCls = extractAttr(openTag, 'class');
var excludeStackTop = excludeStack[excludeStack.length - 1];
if (isExcluded(openTagCls) &&
closeTag.nodeName === excludeStackTop.nodeName) {
excludeStack.pop();
}
tagStack.pop();
} else {
// Mismatched close tags and OPEN_CLOSE tags are ignored.
}
}
}
return idlFragments;
},
],
});

25 changes: 25 additions & 0 deletions test/node/parsing/Console/2
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[Exposed=(Window,Worker,Worklet)]
namespace console { // but see namespace object requirements below
// Logging
void assert(optional boolean condition = false, any... data);
void clear();
void count(optional DOMString label = "default");
void debug(any... data);
void error(any... data);
void info(any... data);
void log(any... data);
void table(any tabularData, optional sequence<DOMString> properties);
void trace(any... data);
void warn(any... data);
void dir(any item, optional object? options);
void dirxml(any... data);

// Grouping
void group(any... data);
void groupCollapsed(any... data);
void groupEnd();

// Timing
void time(optional DOMString label = "default");
void timeEnd(optional DOMString label = "default");
};
25 changes: 25 additions & 0 deletions test/node/parsing/Console/3
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[Exposed=(Window,Worker,Worklet)]
namespace console { // but see namespace object requirements below
// Logging
void assert(optional boolean condition = false, any... data);
void clear();
void count(optional DOMString label = "default");
void debug(any... data);
void error(any... data);
void info(any... data);
void log(any... data);
void table(any tabularData, optional sequence<DOMString> properties);
void trace(any... data);
void warn(any... data);
void dir(any item, optional object? options);
void dirxml(any... data);

// Grouping
void group(any... data);
void groupCollapsed(any... data);
void groupEnd();

// Timing
void time(optional DOMString label = "default");
void timeEnd(optional DOMString label = "default");
};
Loading