GoogleChromeLabs · m-cheung · Jun 27, 2017 · May 23, 2017 · May 29, 2017 · May 29, 2017
diff --git a/config/files.js b/config/files.js
@@ -90,6 +90,10 @@
     `lib${sep}org${sep}chromium${sep}webidl${sep}ast${sep}Stringifier.js`,
     `lib${sep}org${sep}chromium${sep}webidl${sep}ast${sep}Float.js`,
 
+    // HTML File Content and Extractor
+    `lib${sep}org${sep}chromium${sep}webidl${sep}HTMLFileContents.js`,
+    `lib${sep}org${sep}chromium${sep}webidl${sep}IDLFragmentExtractor.js`,
+
     // Web IDL parsers
     `lib${sep}org${sep}chromium${sep}webidl${sep}BaseParser.js`,
     `lib${sep}org${sep}chromium${sep}webidl${sep}Parser.js`,

diff --git a/lib/org/chromium/webidl/HTMLFileContents.js b/lib/org/chromium/webidl/HTMLFileContents.js
@@ -0,0 +1,33 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+'use strict';
+
+foam.CLASS({
+  package: 'org.chromium.webidl',
+  name: 'HTMLFileContents',
+
+  documentation: 'An HTML file that stores it contents.',
+
+  ids: ['url', 'timestamp'],
+
+  properties: [
+    {
+      class: 'String',
+      name: 'url',
+      required: true,
+      final: true
+    },
+    {
+      class: 'Date',
+      name: 'timestamp',
+      required: true,
+      final: true,
+    },
+    {
+      class: 'String',
+      name: 'content',
+      final: true,
+    },
+  ],
+});
diff --git a/lib/org/chromium/webidl/IDLFileContents.js b/lib/org/chromium/webidl/IDLFileContents.js
@@ -12,7 +12,6 @@ foam.CLASS({
   requires: [
     'foam.core.Property',
     'foam.net.HTTPrequest',
-    'org.chromium.webidl.IDLFileBase',
   ],
 
   properties: [

diff --git a/lib/org/chromium/webidl/IDLFragmentExtractor.js b/lib/org/chromium/webidl/IDLFragmentExtractor.js
@@ -0,0 +1,101 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+'use strict';
+
+foam.CLASS({
+  package: 'org.chromium.webidl',
+  name: 'IDLFragmentExtractor',
+  documentation: 'extracts IDL Fragments from HTML files',
+  requires: [
+    'foam.parsers.html.HTMLLexer'
+  ],
+
+  properties: [
+    {
+      name: 'file',
+      of: 'org.chromium.webidl.HTMLFileContent',
+      postSet: function(_, file) {
+        this.idlFragments = this.extract();
+      },
+    },
+    {
+      name: 'idlFragments',
+      // description: 'all idlFragments found in the parsed HTML file',
+    },
+  ],
+
+  methods: [
+    function extract() {
+      var self = this;
+      var lexer = self.HTMLLexer.create();
+      var OPEN = lexer.TagType.OPEN.name;
+      var CLOSE = lexer.TagType.CLOSE.name;
+      var extractAttr = function(node, attrName) {
+        var retVal;
+        node.attributes.some(function(attr) {
+          if (attr.name === attrName) {
+            retVal = attr.value.split(' ');
+          }
+        });
+        return retVal;
+      };
+
+      var results = lexer.parseString(self.file.content).value;
+
+      var idlFragments = [];
+      var tagStack = [];            // Used for tag matching.
+      var excludeStack = [];        // Used for tracking excluded (example/note) tags.
+      var tagMatching = true;       // Set whe not inside a pre tag of interest.
+      var content = '';             // Used to group together related content.
+      for (var i = 0; i < results.length; i++) {
+        var item = results[i];
+        var isTag = lexer.Tag.isInstance(item);
+
+        // FUTURE: Handle nested pre tags.
+        // As of this writing, there has not been any IDL fragments
+        // that has been found within nested pre tags.
+        if (!tagMatching) {
+          // Ignoring all tags. Only extracting text within pre tags.
+          if (isTag && item.nodeName === 'pre') {
+            tagMatching = true;
+            tagStack.pop();
+            idlFragments.push(lexer.lib.unescapeString(content));
+            content = '';
+          } else {
+            content += isTag ? '' : item;
+          }
+        } else if (isTag) {
+          var top = tagStack[tagStack.length - 1];
+          var classes = extractAttr(item, 'class');
+          var isIDL = classes && classes.includes('idl');
+          var isExcluded = function(cls) {
+            return cls && (cls.includes('example') || cls.includes('note'));
+          };
+
+          if (item.type.name === OPEN) {
+            if (isExcluded(classes)) {
+              excludeStack.push(item);
+            } else if (excludeStack.length === 0 && item.nodeName === 'pre' && isIDL) {
+              // Found a <pre class="idl.*">.
+              // Ignore tags and only extract text now.
+              tagMatching = false;
+            }
+            tagStack.push(item);
+          } else if (top && item.type.name === CLOSE && top.nodeName === item.nodeName) {
+            var parentCls = extractAttr(top, 'class');
+            var excludeStackTop = excludeStack[excludeStack.length - 1];
+            if (isExcluded(parentCls) && item.nodeName === excludeStackTop.nodeName) {
+              excludeStack.pop();
+            }
+            tagStack.pop();
+          } else {
+            // Mismatched close tags and OPEN_CLOSE tags are ignored.
+          }
+        }
+      }
+      return idlFragments;
+    }
+  ]
+});
+
diff --git a/test/any/htmlFileClasses-test.js b/test/any/htmlFileClasses-test.js
@@ -0,0 +1,51 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+'use strict';
+
+describe('HTML file classes', function() {
+  var HTMLFileContents;
+
+  beforeEach(function() {
+    HTMLFileContents = foam.lookup('org.chromium.webidl.HTMLFileContents');
+  });
+
+  it('should fetch some content and properly set the timestamp', function() {
+    var url = 'http://someTest.url/index.html';
+    var content = '<html></html>';
+    var date = new Date();
+    var file = HTMLFileContents.create({
+      url: url,
+      timestamp: new Date(),
+      content: content
+    });
+
+    // Verify properties are as set
+    expect(file.url).toBe(url);
+    expect(file.timestamp.getTime()).toBe(date.getTime());
+    expect(file.content).toBe(content);
+  });
+
+  it('should fail to set HTMLFileContent props after creation', function() {
+    var initUrl = 'http://someTest.url/index.html';
+    var newUrl = 'http://someOther.url/index.html';
+    var initContent = '<html></html>';
+    var newContent = '<html>Potato</html>';
+    var origDate = new Date(0);
+    var newDate = new Date();
+
+    var file = HTMLFileContents.create({
+      url: initUrl,
+      timestamp: origDate,
+      content: initContent
+    });
+
+    // On set, they should fail
+    expect(function() { file.url = newUrl; }).toThrow();
+    expect(file.url).toBe(initUrl);
+    expect(function() { file.content = newContent; }).toThrow();
+    expect(file.content).toBe(initContent);
+    expect(function() { file.timestamp = newDate; }).toThrow();
+    expect(file.timestamp.getTime()).toBe(origDate.getTime());
+  });
+});
diff --git a/test/node/parsing/Console/2 b/test/node/parsing/Console/2
@@ -0,0 +1,25 @@
+[Exposed=(Window,Worker,Worklet)]
+namespace console { // but see namespace object requirements below
+  // Logging
+  void assert(optional boolean condition = false, any... data);
+  void clear();
+  void count(optional DOMString label = "default");
+  void debug(any... data);
+  void error(any... data);
+  void info(any... data);
+  void log(any... data);
+  void table(any tabularData, optional sequence<DOMString> properties);
+  void trace(any... data);
+  void warn(any... data);
+  void dir(any item, optional object? options);
+  void dirxml(any... data);
+
+  // Grouping
+  void group(any... data);
+  void groupCollapsed(any... data);
+  void groupEnd();
+
+  // Timing
+  void time(optional DOMString label = "default");
+  void timeEnd(optional DOMString label = "default");
+};
diff --git a/test/node/parsing/Console/3 b/test/node/parsing/Console/3
@@ -0,0 +1,25 @@
+[Exposed=(Window,Worker,Worklet)]
+namespace console { // but see namespace object requirements below
+  // Logging
+  void assert(optional boolean condition = false, any... data);
+  void clear();
+  void count(optional DOMString label = "default");
+  void debug(any... data);
+  void error(any... data);
+  void info(any... data);
+  void log(any... data);
+  void table(any tabularData, optional sequence<DOMString> properties);
+  void trace(any... data);
+  void warn(any... data);
+  void dir(any item, optional object? options);
+  void dirxml(any... data);
+
+  // Grouping
+  void group(any... data);
+  void groupCollapsed(any... data);
+  void groupEnd();
+
+  // Timing
+  void time(optional DOMString label = "default");
+  void timeEnd(optional DOMString label = "default");
+};