-
Notifications
You must be signed in to change notification settings - Fork 13
HTMLFileContent and component for extracting IDL fragments #46
Changes from 21 commits
dd775b5
58e5e48
32366c5
6376b3d
959d608
b35f750
853d12d
0f92a6b
b640239
e9eb44d
5bcfc90
a041087
5969d4b
ad7669e
dd2bd1d
f812b58
ff6ad90
c5fb6fb
c736c28
af4cd9d
20932a6
5f93bbb
30df62a
7c3d78e
6865625
2dc37e8
c93dec6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Copyright 2017 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
'use strict'; | ||
|
||
foam.CLASS({ | ||
package: 'org.chromium.webidl', | ||
name: 'HTMLFileContents', | ||
|
||
documentation: 'An HTML file that stores it contents.', | ||
|
||
ids: ['url', 'timestamp'], | ||
|
||
properties: [ | ||
{ | ||
class: 'String', | ||
name: 'url', | ||
required: true, | ||
final: true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: add trailing comma |
||
}, | ||
{ | ||
class: 'Date', | ||
name: 'timestamp', | ||
required: true, | ||
final: true, | ||
}, | ||
{ | ||
class: 'String', | ||
name: 'content', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
final: true, | ||
}, | ||
], | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
// Copyright 2017 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
'use strict'; | ||
|
||
foam.CLASS({ | ||
package: 'org.chromium.webidl', | ||
name: 'IDLFragmentExtractor', | ||
documentation: 'extracts IDL Fragments from HTML files', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||
requires: [ | ||
'foam.parsers.html.HTMLLexer' | ||
], | ||
|
||
properties: [ | ||
{ | ||
name: 'file', | ||
of: 'org.chromium.webidl.HTMLFileContent', | ||
postSet: function(_, file) { | ||
this.idlFragments = this.extract(); | ||
}, | ||
}, | ||
{ | ||
name: 'idlFragments', | ||
// description: 'all idlFragments found in the parsed HTML file', | ||
}, | ||
], | ||
|
||
methods: [ | ||
function extract() { | ||
var self = this; | ||
var lexer = self.HTMLLexer.create(); | ||
var OPEN = lexer.TagType.OPEN.name; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here and elsewhere ( |
||
var CLOSE = lexer.TagType.CLOSE.name; | ||
var extractAttr = function(node, attrName) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's legitimate to have: <node-name attr="value1
value2" attr="value3"> to yield I assume HTMLLexer doesn't collapse whitespace, so I think we need to revise this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have made minor changes to the extract code which allows for this. It will be part of the next set of changes. |
||
var retVal; | ||
node.attributes.some(function(attr) { | ||
if (attr.name === attrName) { | ||
retVal = attr.value.split(' '); | ||
} | ||
}); | ||
return retVal; | ||
}; | ||
|
||
var results = lexer.parseString(self.file.content).value; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should probably throw something intentional on incomplete or failed parse. |
||
|
||
var idlFragments = []; | ||
var tagStack = []; // Used for tag matching. | ||
var excludeStack = []; // Used for tracking excluded (example/note) tags. | ||
var tagMatching = true; // Set whe not inside a pre tag of interest. | ||
var content = ''; // Used to group together related content. | ||
for (var i = 0; i < results.length; i++) { | ||
var item = results[i]; | ||
var isTag = lexer.Tag.isInstance(item); | ||
|
||
// FUTURE: Handle nested pre tags. | ||
// As of this writing, there has not been any IDL fragments | ||
// that has been found within nested pre tags. | ||
if (!tagMatching) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clearer IMHO: |
||
// Ignoring all tags. Only extracting text within pre tags. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this comment. Can we get a comment at the top of each |
||
if (isTag && item.nodeName === 'pre') { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to handle nested pre tags? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is hard to say at this point whether nested We could attempt to put the content through another round of processing or implement a proper HTML parser (which was my first attempt at this problem, but was scrapped since it did a lot more than it needed to and likely had other issues too). |
||
tagMatching = true; | ||
tagStack.pop(); | ||
idlFragments.push(lexer.lib.unescapeString(content)); | ||
content = ''; | ||
} else { | ||
content += isTag ? '' : item; | ||
} | ||
} else if (isTag) { | ||
var top = tagStack[tagStack.length - 1]; | ||
var classes = extractAttr(item, 'class'); | ||
var isIDL = classes && classes.includes('idl'); | ||
var isExcluded = function(cls) { | ||
return cls && (cls.includes('example') || cls.includes('note')); | ||
}; | ||
|
||
if (item.type.name === OPEN) { | ||
if (isExcluded(classes)) { | ||
excludeStack.push(item); | ||
} else if (excludeStack.length === 0 && item.nodeName === 'pre' && isIDL) { | ||
// Found a <pre class="idl.*">. | ||
// Ignore tags and only extract text now. | ||
tagMatching = false; | ||
} | ||
tagStack.push(item); | ||
} else if (top && item.type.name === CLOSE && top.nodeName === item.nodeName) { | ||
var parentCls = extractAttr(top, 'class'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't var openTag = top;
var closeTag = item and |
||
var excludeStackTop = excludeStack[excludeStack.length - 1]; | ||
if (isExcluded(parentCls) && item.nodeName === excludeStackTop.nodeName) { | ||
excludeStack.pop(); | ||
} | ||
tagStack.pop(); | ||
} else { | ||
// Mismatched close tags and OPEN_CLOSE tags are ignored. | ||
} | ||
} | ||
} | ||
return idlFragments; | ||
} | ||
] | ||
}); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright 2017 The Chromium Authors. All rights reserved. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. file name: we just test |
||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
'use strict'; | ||
|
||
describe('HTML file classes', function() { | ||
var HTMLFileContents; | ||
|
||
beforeEach(function() { | ||
HTMLFileContents = foam.lookup('org.chromium.webidl.HTMLFileContents'); | ||
}); | ||
|
||
it('should fetch some content and properly set the timestamp', function() { | ||
var url = 'http://someTest.url/index.html'; | ||
var content = '<html></html>'; | ||
var date = new Date(); | ||
var file = HTMLFileContents.create({ | ||
url: url, | ||
timestamp: new Date(), | ||
content: content | ||
}); | ||
|
||
// Verify properties are as set | ||
expect(file.url).toBe(url); | ||
expect(file.timestamp.getTime()).toBe(date.getTime()); | ||
expect(file.content).toBe(content); | ||
}); | ||
|
||
it('should fail to set HTMLFileContent props after creation', function() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably nix this due to how |
||
var initUrl = 'http://someTest.url/index.html'; | ||
var newUrl = 'http://someOther.url/index.html'; | ||
var initContent = '<html></html>'; | ||
var newContent = '<html>Potato</html>'; | ||
var origDate = new Date(0); | ||
var newDate = new Date(); | ||
|
||
var file = HTMLFileContents.create({ | ||
url: initUrl, | ||
timestamp: origDate, | ||
content: initContent | ||
}); | ||
|
||
// On set, they should fail | ||
expect(function() { file.url = newUrl; }).toThrow(); | ||
expect(file.url).toBe(initUrl); | ||
expect(function() { file.content = newContent; }).toThrow(); | ||
expect(file.content).toBe(initContent); | ||
expect(function() { file.timestamp = newDate; }).toThrow(); | ||
expect(file.timestamp.getTime()).toBe(origDate.getTime()); | ||
}); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[Exposed=(Window,Worker,Worklet)] | ||
namespace console { // but see namespace object requirements below | ||
// Logging | ||
void assert(optional boolean condition = false, any... data); | ||
void clear(); | ||
void count(optional DOMString label = "default"); | ||
void debug(any... data); | ||
void error(any... data); | ||
void info(any... data); | ||
void log(any... data); | ||
void table(any tabularData, optional sequence<DOMString> properties); | ||
void trace(any... data); | ||
void warn(any... data); | ||
void dir(any item, optional object? options); | ||
void dirxml(any... data); | ||
|
||
// Grouping | ||
void group(any... data); | ||
void groupCollapsed(any... data); | ||
void groupEnd(); | ||
|
||
// Timing | ||
void time(optional DOMString label = "default"); | ||
void timeEnd(optional DOMString label = "default"); | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[Exposed=(Window,Worker,Worklet)] | ||
namespace console { // but see namespace object requirements below | ||
// Logging | ||
void assert(optional boolean condition = false, any... data); | ||
void clear(); | ||
void count(optional DOMString label = "default"); | ||
void debug(any... data); | ||
void error(any... data); | ||
void info(any... data); | ||
void log(any... data); | ||
void table(any tabularData, optional sequence<DOMString> properties); | ||
void trace(any... data); | ||
void warn(any... data); | ||
void dir(any item, optional object? options); | ||
void dirxml(any... data); | ||
|
||
// Grouping | ||
void group(any... data); | ||
void groupCollapsed(any... data); | ||
void groupEnd(); | ||
|
||
// Timing | ||
void time(optional DOMString label = "default"); | ||
void timeEnd(optional DOMString label = "default"); | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More docs: Is the
HTMLFileContents.contents
pre-processed in any way? (E.g.,&foo;
-escaped?) or is it the raw request body?