From 11c8da766587fb2532542a1e9a73cc2bb3356556 Mon Sep 17 00:00:00 2001 From: Alexei Date: Sun, 21 Oct 2018 16:42:11 -0400 Subject: [PATCH] Fix tab URL misattribution in learning listeners. By checking the request's document URL when available. --- src/js/heuristicblocking.js | 8 +++++ src/js/utils.js | 62 ++++++++++++++++++++++++++++++++++- src/js/webrequest.js | 65 ++----------------------------------- 3 files changed, 72 insertions(+), 63 deletions(-) diff --git a/src/js/heuristicblocking.js b/src/js/heuristicblocking.js index 8ed75446b6..0dd3d6a773 100644 --- a/src/js/heuristicblocking.js +++ b/src/js/heuristicblocking.js @@ -114,6 +114,14 @@ HeuristicBlocker.prototype = { let tab_origin = tabOrigins[details.tabId]; + // we may no longer be on the page the request is coming from + const request_doc_host = utils.getDocumentHostForRequest(details), + request_doc_origin = window.getBaseDomain(request_doc_host), + misattribution = request_doc_origin && request_doc_origin != tab_origin; + if (misattribution) { + tab_origin = request_doc_origin; + } + // ignore first-party requests if (!tab_origin || request_origin == tab_origin) { return {}; diff --git a/src/js/utils.js b/src/js/utils.js index 825ac240af..906f4423ff 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -344,15 +344,75 @@ function getHostFromDomainInput(input) { return uri.host; } +/** + * Gets the hostname for a given request's top-level document. + * + * The request's document may be different from the current top-level document + * loaded in tab as requests can come out of order: + * + * - "main_frame" requests usually but not always mark a boundary + * (navigating to another site while the current page is still loading) + * - sometimes there is no "main_frame" request + * (service worker pages in Firefox) + * + * @param {Object} details chrome.webRequest request details object + * + * @return {String} the hostname for the request's top-level document + */ +function getDocumentHostForRequest(details) { + let host, url; + + // Firefox 54+ + if (details.hasOwnProperty("documentUrl")) { + if (details.type == "main_frame") { + // the top-level document itself + url = details.url; + } else if (details.hasOwnProperty("frameAncestors")) { + // Firefox 58+ + if (details.frameAncestors.length) { + // inside a frame + url = details.frameAncestors[details.frameAncestors.length - 1].url; + } else { + // inside the top-level document + url = details.documentUrl; + } + } else { + // TODO Firefox 54-57 or a service worker request + if (details.documentUrl.endsWith("/sw.js")) { + url = details.documentUrl; + } + } + + // Chrome 63+ + } else if (details.hasOwnProperty("initiator")) { + if (details.initiator && details.initiator != "null") { + if (details.type == "main_frame") { + url = details.url; + } else if (details.parentFrameId == -1 || details.type == "sub_frame" && details.parentFrameId === 0) { + // TODO can only rely on initiator for main frame resources: + // https://crbug.com/838242#c17 + url = details.initiator + '/'; + } + } + } + + if (url) { + host = window.extractHostFromURL(url); + } + + return host; +} + /************************************** exports */ var exports = { arrayBufferToBase64, estimateMaxEntropy, explodeSubdomains, + getDocumentHostForRequest, getHostFromDomainInput, nDaysFromNow, - oneDayFromNow, oneDay, + oneDayFromNow, oneHour, oneMinute, oneSecond, diff --git a/src/js/webrequest.js b/src/js/webrequest.js index fd532f4005..4c1b0fe642 100644 --- a/src/js/webrequest.js +++ b/src/js/webrequest.js @@ -80,7 +80,7 @@ function onBeforeRequest(details) { // if we are no longer on the page the request is coming for, // don't log in popup or attempt to replace social widgets // but do block request/modify headers - const request_doc_host = getDocumentHostForRequest(details), + const request_doc_host = utils.getDocumentHostForRequest(details), misattribution = request_doc_host && request_doc_host != tab_host; if (misattribution) { tab_host = request_doc_host; @@ -178,7 +178,7 @@ function onBeforeSendHeaders(details) { // if we are no longer on the page the request is coming for, // don't log in popup or attempt to replace social widgets // but do block request/modify headers - const request_doc_host = getDocumentHostForRequest(details), + const request_doc_host = utils.getDocumentHostForRequest(details), misattribution = request_doc_host && request_doc_host != tab_host; if (misattribution) { tab_host = request_doc_host; @@ -308,7 +308,7 @@ function onHeadersReceived(details) { // if we are no longer on the page the request is coming for, // don't log in popup or attempt to replace social widgets // but do block request/modify headers - const request_doc_host = getDocumentHostForRequest(details), + const request_doc_host = utils.getDocumentHostForRequest(details), misattribution = request_doc_host && request_doc_host != tab_host; if (misattribution) { tab_host = request_doc_host; @@ -381,65 +381,6 @@ function isThirdPartyDomain(domain1, domain2) { return false; } -/** - * Gets the hostname for a given request's top-level document. - * - * The request's document may be different from the current top-level document - * loaded in tab as requests can come out of order: - * - * - "main_frame" requests usually but not always mark a boundary - * (navigating to another site while the current page is still loading) - * - sometimes there is no "main_frame" request - * (service worker pages in Firefox) - * - * @param {Object} details chrome.webRequest request details object - * - * @return {String} the hostname for the request's top-level document - */ -function getDocumentHostForRequest(details) { - let host, url; - - // Firefox 54+ - if (details.hasOwnProperty("documentUrl")) { - if (details.type == "main_frame") { - // the top-level document itself - url = details.url; - } else if (details.hasOwnProperty("frameAncestors")) { - // Firefox 58+ - if (details.frameAncestors.length) { - // inside a frame - url = details.frameAncestors[details.frameAncestors.length - 1].url; - } else { - // inside the top-level document - url = details.documentUrl; - } - } else { - // TODO Firefox 54-57 or a service worker request - if (details.documentUrl.endsWith("/sw.js")) { - url = details.documentUrl; - } - } - - // Chrome 63+ - } else if (details.hasOwnProperty("initiator")) { - if (details.initiator && details.initiator != "null") { - if (details.type == "main_frame") { - url = details.url; - } else if (details.parentFrameId == -1 || details.type == "sub_frame" && details.parentFrameId === 0) { - // TODO can only rely on initiator for main frame resources: - // https://crbug.com/838242#c17 - url = details.initiator + '/'; - } - } - } - - if (url) { - host = window.extractHostFromURL(url); - } - - return host; -} - /** * Gets the host name for a given tab id * @param {Integer} tabId chrome tab id