GoogleChrome · brendankenny · Nov 8, 2017 · Oct 19, 2017 · Oct 22, 2017 · Oct 26, 2017
diff --git a/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html b/lighthouse-cli/test/fixtures/seo/seo-failure-cases.html
@@ -11,6 +11,7 @@
   <meta charset="utf-8">
   <meta name="viewport" content="invalid-content=should_have_looked_it_up">
   <!-- no <meta name="description" content=""> -->
+  <meta name="robots" content="nofollow, NOINDEX, all">
 </head>
 <body>
   <h1>SEO</h1>

diff --git a/lighthouse-cli/test/fixtures/static-server.js b/lighthouse-cli/test/fixtures/static-server.js
@@ -12,6 +12,7 @@ const path = require('path');
 const fs = require('fs');
 const parseQueryString = require('querystring').parse;
 const parseURL = require('url').parse;
+const HEADER_SAFELIST = new Set(['x-robots-tag']);
 
 const lhRootDirPath = path.join(__dirname, '../../../');
 
@@ -51,13 +52,14 @@ function requestHandler(request, response) {
   }
 
   function sendResponse(statusCode, data) {
-    let headers;
+    const headers = {};
+
     if (filePath.endsWith('.js')) {
-      headers = {'Content-Type': 'text/javascript'};
+      headers['Content-Type'] = 'text/javascript';
     } else if (filePath.endsWith('.css')) {
-      headers = {'Content-Type': 'text/css'};
+      headers['Content-Type'] = 'text/css';
     } else if (filePath.endsWith('.svg')) {
-      headers = {'Content-Type': 'image/svg+xml'};
+      headers['Content-Type'] = 'image/svg+xml';
     }
 
     let delay = 0;
@@ -72,6 +74,19 @@ function requestHandler(request, response) {
         delay = parseInt(queryString.delay, 10) || 2000;
       }
 
+      if (typeof queryString.extra_header !== 'undefined') {
+        let extraHeaders = queryString.extra_header;
+        extraHeaders = Array.isArray(extraHeaders) ? extraHeaders : [extraHeaders];
+
+        extraHeaders.forEach(header => {
+          const [headerName, ...headerValue] = header.split(':');
+
+          if (HEADER_SAFELIST.has(headerName.toLowerCase())) {
+            headers[headerName] = headerValue.join(':');
+          }
+        });
+      }
+
       // redirect url to new url if present
       if (typeof queryString.redirect !== 'undefined') {
         return setTimeout(sendRedirect, delay, queryString.redirect);

diff --git a/lighthouse-cli/test/smokehouse/seo/expectations.js b/lighthouse-cli/test/smokehouse/seo/expectations.js
@@ -28,11 +28,14 @@ module.exports = [
       'link-text': {
         score: true,
       },
+      'is-crawlable': {
+        score: true,
+      },
     },
   },
   {
-    initialUrl: 'http://localhost:10200/seo/seo-failure-cases.html?status_code=403',
-    url: 'http://localhost:10200/seo/seo-failure-cases.html?status_code=403',
+    initialUrl: 'http://localhost:10200/seo/seo-failure-cases.html?status_code=403&extra_header=x-robots-tag:none',
+    url: 'http://localhost:10200/seo/seo-failure-cases.html?status_code=403&extra_header=x-robots-tag:none',
     audits: {
       'viewport': {
         score: false,
@@ -61,6 +64,14 @@ module.exports = [
           },
         },
       },
+      'is-crawlable': {
+        score: false,
+        details: {
+          items: {
+            length: 2,
+          },
+        },
+      },
     },
   },
 ];
diff --git a/lighthouse-core/audits/seo/is-crawlable.js b/lighthouse-core/audits/seo/is-crawlable.js
@@ -0,0 +1,112 @@
+/**
+ * @license Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+ */
+'use strict';
+
+const Audit = require('../audit');
+const BLOCKLIST = new Set([
+  'noindex',
+  'none',
+]);
+const ROBOTS_HEADER = 'x-robots-tag';
+const UNAVAILABLE_AFTER = 'unavailable_after';
+
+/**
+ * Checks if given directive is a valid unavailable_after directive with a date in the past
+ * @param {string} directive
+ * @returns {boolean}
+ */
+function isUnavailable(directive) {
+  const parts = directive.split(':');
+
+  if (parts.length <= 1 || parts[0] !== UNAVAILABLE_AFTER) {
+    return false;
+  }
+
+  const date = Date.parse(parts.slice(1).join(':'));
+
+  return !isNaN(date) && date < Date.now();
+}
+
+/**
+ * Returns true if any of provided directives blocks page from being indexed
+ * @param {string} directives
+ * @returns {boolean}
+ */
+function hasBlockingDirective(directives) {
+  return directives.split(',')
+    .map(d => d.toLowerCase().trim())
+    .some(d => BLOCKLIST.has(d) || isUnavailable(d));
+}
+
+/**
+ * Returns true if robots header specifies user agent (e.g. `googlebot: noindex`)
+ * @param {string} directives
+ * @returns {boolean}
+ */
+function hasUserAgent(directives) {
+  const parts = directives.match(/^([^,:]+):/);
+
+  // Check if directives are prefixed with `googlebot:`, `googlebot-news:`, `otherbot:`, etc.
+  // but ignore `unavailable_after:` which is a valid directive
+  return !!parts && parts[1].toLowerCase() !== UNAVAILABLE_AFTER;
+}
+
+class IsCrawlable extends Audit {
+  /**
+   * @return {!AuditMeta}
+   */
+  static get meta() {
+    return {
+      name: 'is-crawlable',
+      description: 'Page isn’t blocked from indexing',
+      failureDescription: 'Page is blocked from indexing',
+      helpText: 'The "Robots" directives tell crawlers how your content should be indexed. ' +
+      '[Learn more](https://developers.google.com/search/reference/robots_meta_tag).',
+      requiredArtifacts: ['MetaRobots'],
+    };
+  }
+
+  /**
+   * @param {!Artifacts} artifacts
+   * @return {!AuditResult}
+   */
+  static audit(artifacts) {
+    return artifacts.requestMainResource(artifacts.devtoolsLogs[Audit.DEFAULT_PASS])
+      .then(mainResource => {
+        const blockingDirectives = [];
+
+        if (artifacts.MetaRobots) {
+          const isBlocking = hasBlockingDirective(artifacts.MetaRobots);
+
+          if (isBlocking) {
+            blockingDirectives.push({
+              source: {
+                type: 'node',
+                snippet: `<meta name="robots" content="${artifacts.MetaRobots}" />`,
+              },
+            });
+          }
+        }
+
+        mainResource.responseHeaders
+          .filter(h => h.name.toLowerCase() === ROBOTS_HEADER && !hasUserAgent(h.value) &&
+            hasBlockingDirective(h.value))
+          .forEach(h => blockingDirectives.push({source: `${h.name}: ${h.value}`}));
+
+        const headings = [
+          {key: 'source', itemType: 'code', text: 'Source'},
+        ];
+        const details = Audit.makeTableDetails(headings, blockingDirectives);
+
+        return {
+          rawValue: blockingDirectives.length === 0,
+          details,
+        };
+      });
+  }
+}
+
+module.exports = IsCrawlable;
diff --git a/lighthouse-core/config/default.js b/lighthouse-core/config/default.js
@@ -37,6 +37,8 @@ module.exports = {
       'dobetterweb/tags-blocking-first-paint',
       'dobetterweb/websql',
       'seo/meta-description',
+      'seo/crawlable-links',
+      'seo/meta-robots',
     ],
   },
   {
@@ -148,6 +150,8 @@ module.exports = {
     'dobetterweb/uses-passive-event-listeners',
     'seo/meta-description',
     'seo/http-status-code',
+    'seo/link-text',
+    'seo/is-crawlable',
   ],
 
   groups: {

diff --git a/lighthouse-core/config/seo.js b/lighthouse-core/config/seo.js
@@ -12,12 +12,14 @@ module.exports = {
     gatherers: [
       'seo/meta-description',
       'seo/crawlable-links',
+      'seo/meta-robots',
     ],
   }],
   audits: [
     'seo/meta-description',
     'seo/http-status-code',
     'seo/link-text',
+    'seo/is-crawlable',
   ],
   groups: {
     'seo-mobile': {
@@ -44,6 +46,7 @@ module.exports = {
         {id: 'meta-description', weight: 1, group: 'seo-content'},
         {id: 'http-status-code', weight: 1, group: 'seo-crawl'},
         {id: 'link-text', weight: 1, group: 'seo-content'},
+        {id: 'is-crawlable', weight: 1, group: 'seo-crawl'},
       ],
     },
   },

diff --git a/lighthouse-core/gather/gatherers/seo/meta-robots.js b/lighthouse-core/gather/gatherers/seo/meta-robots.js
@@ -0,0 +1,23 @@
+/**
+ * @license Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+ */
+'use strict';
+
+const Gatherer = require('../gatherer');
+
+class MetaRobots extends Gatherer {
+  /**
+   * @param {{driver: !Driver}} options Run options
+   * @return {!Promise<?string>} The value of the description meta's content attribute, or null
+   */
+  afterPass(options) {
+    const driver = options.driver;
+
+    return driver.querySelector('head meta[name="robots" i]')
+      .then(node => node && node.getAttribute('content'));
+  }
+}
+
+module.exports = MetaRobots;