-
Notifications
You must be signed in to change notification settings - Fork 9.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
new_audit(blocked-from-indexing): page is blocked from indexing #3657
Changes from 7 commits
1336519
8274b33
5de29e8
4deffb8
0a85233
e46bbb6
d1c2667
77470fd
f3ca775
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,13 +51,14 @@ function requestHandler(request, response) { | |
} | ||
|
||
function sendResponse(statusCode, data) { | ||
let headers; | ||
const headers = {}; | ||
|
||
if (filePath.endsWith('.js')) { | ||
headers = {'Content-Type': 'text/javascript'}; | ||
headers['Content-Type'] = 'text/javascript'; | ||
} else if (filePath.endsWith('.css')) { | ||
headers = {'Content-Type': 'text/css'}; | ||
headers['Content-Type'] = 'text/css'; | ||
} else if (filePath.endsWith('.svg')) { | ||
headers = {'Content-Type': 'image/svg+xml'}; | ||
headers['Content-Type'] = 'image/svg+xml'; | ||
} | ||
|
||
let delay = 0; | ||
|
@@ -72,6 +73,16 @@ function requestHandler(request, response) { | |
delay = parseInt(queryString.delay, 10) || 2000; | ||
} | ||
|
||
if (typeof queryString.extra_header !== 'undefined') { | ||
let extraHeaders = queryString.extra_header; | ||
extraHeaders = Array.isArray(extraHeaders) ? extraHeaders : [extraHeaders]; | ||
|
||
extraHeaders.forEach(header => { | ||
const parts = header.split(':'); | ||
headers[parts[0]] = parts.slice(1).join(':'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this might be complete overkill, but can we make a set of allowed headers and only add to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One can't be too careful! |
||
}); | ||
} | ||
|
||
// redirect url to new url if present | ||
if (typeof queryString.redirect !== 'undefined') { | ||
return setTimeout(sendRedirect, delay, queryString.redirect); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/** | ||
* @license Copyright 2017 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const Audit = require('../audit'); | ||
const BLOCKLIST = new Set([ | ||
'noindex', | ||
'none', | ||
]); | ||
const ROBOTS_HEADER = 'x-robots-tag'; | ||
const UNAVAILABLE_AFTER = 'unavailable_after'; | ||
|
||
/** | ||
* Checks if given directive is a valid unavailable_after directive with a date in the past | ||
* @param {string} directive | ||
* @returns {boolean} | ||
*/ | ||
function isUnavailable(directive) { | ||
const parts = directive.split(':'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sometimes I find it easier to use array deconstruction in cases like this: const [key, value] = directive.split(':'); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I agree that'd be much more elegant, but in this case it won't work: const [key, value] = 'unavailable_after: 12 Jun 2017 12:30:00'.split(':'); value in this case would be const [key, ...value] = 'unavailable_after: 12 Jun 2017 12:30:00'.split(':'); But then, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL about second parameter of const [key, value] = 'unavailable_after: 12 Jun 2017 12:30:00'.split(':', 1);
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah you're right, it doesn't do what I thought it would. Carry on! |
||
|
||
if (parts.length <= 1 || parts[0] !== UNAVAILABLE_AFTER) { | ||
return false; | ||
} | ||
|
||
const date = Date.parse(parts.slice(1).join(':')); | ||
|
||
return !isNaN(date) && date < Date.now(); | ||
} | ||
|
||
/** | ||
* Returns true if any of provided directives blocks page from being indexed | ||
* @param {string} directives | ||
* @returns {boolean} | ||
*/ | ||
function hasBlockingDirective(directives) { | ||
return directives.split(',') | ||
.map(d => d.toLowerCase().trim()) | ||
.some(d => BLOCKLIST.has(d) || isUnavailable(d)); | ||
} | ||
|
||
/** | ||
* Returns true if robots header specifies user agent (e.g. `googlebot: noindex`) | ||
* @param {string} directives | ||
* @returns {boolean} | ||
*/ | ||
function hasUserAgent(directives) { | ||
const parts = directives.match(/^([^,:]+):/); | ||
|
||
// Check if directives are prefixed with `googlebot:`, `googlebot-news:`, `otherbot:`, etc. | ||
// but ignore `unavailable_after:` which is a valid directive | ||
return !!parts && parts[1].toLowerCase() !== UNAVAILABLE_AFTER; | ||
} | ||
|
||
class IsCrawlable extends Audit { | ||
/** | ||
* @return {!AuditMeta} | ||
*/ | ||
static get meta() { | ||
return { | ||
name: 'is-crawlable', | ||
description: 'Page isn’t blocked from indexing', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer to word this more affirmatively (i.e. |
||
failureDescription: 'Page is blocked from indexing', | ||
helpText: 'The "Robots" directives tell crawlers how your content should be indexed. ' + | ||
'[Learn more](https://developers.google.com/search/reference/robots_meta_tag).', | ||
requiredArtifacts: ['MetaRobots'], | ||
}; | ||
} | ||
|
||
/** | ||
* @param {!Artifacts} artifacts | ||
* @return {!AuditResult} | ||
*/ | ||
static audit(artifacts) { | ||
return artifacts.requestMainResource(artifacts.devtoolsLogs[Audit.DEFAULT_PASS]) | ||
.then(mainResource => { | ||
const blockingDirectives = []; | ||
|
||
if (artifacts.MetaRobots) { | ||
const isBlocking = hasBlockingDirective(artifacts.MetaRobots); | ||
|
||
if (isBlocking) { | ||
blockingDirectives.push({ | ||
source: { | ||
type: 'node', | ||
snippet: `<meta name="robots" content="${artifacts.MetaRobots}" />`, | ||
}, | ||
}); | ||
} | ||
} | ||
|
||
mainResource.responseHeaders | ||
.filter(h => h.name.toLowerCase() === ROBOTS_HEADER && !hasUserAgent(h.value) && | ||
hasBlockingDirective(h.value)) | ||
.forEach(h => blockingDirectives.push({source: `${h.name}: ${h.value}`})); | ||
|
||
const headings = [ | ||
{key: 'source', itemType: 'code', text: 'Source'}, | ||
]; | ||
const details = Audit.makeTableDetails(headings, blockingDirectives); | ||
|
||
return { | ||
rawValue: blockingDirectives.length === 0, | ||
details, | ||
}; | ||
}); | ||
} | ||
} | ||
|
||
module.exports = IsCrawlable; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/** | ||
* @license Copyright 2017 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const Gatherer = require('../gatherer'); | ||
|
||
class MetaRobots extends Gatherer { | ||
/** | ||
* @param {{driver: !Driver}} options Run options | ||
* @return {!Promise<?string>} The value of the description meta's content attribute, or null | ||
*/ | ||
afterPass(options) { | ||
const driver = options.driver; | ||
|
||
return driver.querySelector('head meta[name="robots"]') | ||
.then(node => node && node.getAttribute('content')); | ||
} | ||
} | ||
|
||
module.exports = MetaRobots; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually would prefer
const [key, ...value]
, but that's just down to preference.You could also do
header.split(/:(.+)/);
, which should give the correct split (captured groups also appear in the resulting array)