From 9b5a5f22decf166b71b08b2e8c7721f1852374fc Mon Sep 17 00:00:00 2001 From: Alexei Date: Wed, 6 Dec 2017 18:09:19 -0500 Subject: [PATCH 1/7] Check DNT for tracking domains only. Instead of checking most* third-party domains. This avoids creating action_map entries for non-tracking domains. [*] This also checks DNT for tracking domains found through localStorage or canvas fingerprinting. Previous logic checked cookie-based third-party domains only. --- src/js/heuristicblocking.js | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/js/heuristicblocking.js b/src/js/heuristicblocking.js index ea751153f3..e9212880cc 100644 --- a/src/js/heuristicblocking.js +++ b/src/js/heuristicblocking.js @@ -88,8 +88,6 @@ HeuristicBlocker.prototype = { /** * Wraps _recordPrevalence for use from webRequest listeners. * Also saves tab (page) origins. TODO Should be handled by tabData instead. - * Also sets a timeout for checking DNT policy for third-party FQDNs. - * TODO Does too much, should be broken up ... * * Called from performance-critical webRequest listeners! * Use updateTrackerPrevalence for non-webRequest initiated bookkeeping. @@ -121,10 +119,6 @@ HeuristicBlocker.prototype = { return {}; } - window.setTimeout(function () { - badger.checkForDNTPolicy(fqdn); - }, 10); - // abort if we already made a decision for this FQDN let action = this.storage.getAction(fqdn); if (action != constants.NO_TRACKING && action != constants.ALLOW) { @@ -182,6 +176,13 @@ HeuristicBlocker.prototype = { return; // We already know about the presence of this tracker on the given domain } + // Check this just-seen-tracking-on-this-site, + // not-yet-blocked domain for DNT policy. + // We check heuristically-blocked domains in webrequest.js. + window.setTimeout(function () { + badger.checkForDNTPolicy(tracker_fqdn); + }, 10); + // record that we've seen this tracker on this domain (in snitch map) firstParties.push(page_origin); snitch_map.setItem(tracker_origin, firstParties); From 88a0a432104afc626a9db0a1276469f45c7f4b8e Mon Sep 17 00:00:00 2001 From: Alexei Date: Wed, 6 Dec 2017 18:18:47 -0500 Subject: [PATCH 2/7] Avoid checking DNT for user-blocked domains. Saves extra work, as DNT compliance does not override user preference. --- src/js/webrequest.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/js/webrequest.js b/src/js/webrequest.js index 68390f72b9..5d95a4d4b8 100644 --- a/src/js/webrequest.js +++ b/src/js/webrequest.js @@ -116,9 +116,13 @@ function onBeforeRequest(details) { }; chrome.tabs.sendMessage(tab_id, msg); - window.setTimeout(function () { - badger.checkForDNTPolicy(requestDomain); - }, 10); + // if this is a heuristically- (not user-) blocked domain + if (requestAction == constants.BLOCK) { + // check for DNT policy + window.setTimeout(function () { + badger.checkForDNTPolicy(requestDomain); + }, 10); + } if (type == 'sub_frame' && badger.getSettings().getItem('hideBlockedElements')) { return { From 82fd5d899e2048437dd2921e498ea3a9ec00fc79 Mon Sep 17 00:00:00 2001 From: Alexei Date: Wed, 6 Dec 2017 18:24:03 -0500 Subject: [PATCH 3/7] Remove manually set and then unset domains. --- src/js/storage.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/js/storage.js b/src/js/storage.js index 9553d2ffb3..76668ef8f2 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -323,10 +323,18 @@ BadgerPen.prototype = { /** * Remove user set action from a domain - * @param domain FQDN string - **/ + * @param domain FQDN string + */ revertUserAction: function(domain) { this._setupDomainAction(domain, "", "userAction"); + + // if unsetting userAction returns the domain's action map to all defaults, + // remove the action map for the domain + const actionMap = this.getBadgerStorageObject("action_map"); + if (_.isEqual(actionMap.getItem(domain), _newActionMapObject())) { + log("Removing %s from action_map", domain); + actionMap.deleteItem(domain); + } } }; From 5438df474fbc562b92c2bc53c6f52d31cb1ed333 Mon Sep 17 00:00:00 2001 From: Alexei Date: Wed, 6 Dec 2017 18:25:32 -0500 Subject: [PATCH 4/7] Test that non-tracking domains do not get recorded --- tests/selenium/dnt_test.py | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/selenium/dnt_test.py b/tests/selenium/dnt_test.py index e865eaf591..48bd912ca8 100644 --- a/tests/selenium/dnt_test.py +++ b/tests/selenium/dnt_test.py @@ -19,6 +19,11 @@ class DNTTest(pbtest.PBSeleniumTest): """Tests to make sure DNT policy checking works as expected.""" + def domain_was_recorded(self, domain): + return self.js("""return ( + Object.keys(badger.storage.action_map.getItemClones()).indexOf('{}') != -1 +);""".format(domain)) + def domain_was_detected(self, domain): return self.js("""return ( Object.keys(badger.tabData).some(tab_id => {{ @@ -147,6 +152,42 @@ def test_dnt_check_should_not_send_cookies(self): result = self.js("return window.DNT_CHECK_RESULT;") self.assertTrue(result, "No cookies were sent") + def test_should_not_record_nontracking_domains(self): + TEST_URL = ( + "https://cdn.rawgit.com/ghostwords/" + "eef2c982fc3151e60a78136ca263294d/raw/13ed3d1e701994640b8d8065b835f8a9684ece92/" + "privacy_badger_recording_nontracking_domains_fixture.html" + ) + TRACKING_DOMAIN = "dnt-request-cookies-test.trackersimulator.org" + NON_TRACKING_DOMAIN = "dnt-test.trackersimulator.org" + + # open Badger's background page + self.load_url(self.bg_url, wait_on_site=1) + + # need to keep Badger's background page open to record what's happening + # so, open and switch to a new window + self.open_window() + + # visit a page containing two third-party resources, + # one from a cookie-tracking domain + # and one from a non-tracking domain + self.load_url(TEST_URL) + + # switch back to Badger's background page + switch_to_window_with_url(self.driver, self.bg_url) + + # verify that the cookie-tracking domain was recorded + self.assertTrue( + self.domain_was_recorded(TRACKING_DOMAIN), + "Tracking domain should have gotten recorded" + ) + + # verify that the non-tracking domain was not recorded + self.assertFalse( + self.domain_was_recorded(NON_TRACKING_DOMAIN), + "Non-tracking domain should not have gotten recorded" + ) + if __name__ == "__main__": unittest.main() From 8111e4180f4665a54e4bd28c07d6995848ad3d64 Mon Sep 17 00:00:00 2001 From: Alexei Date: Fri, 8 Dec 2017 15:41:01 -0500 Subject: [PATCH 5/7] Add migration to remove non-tracking domains. --- src/js/background.js | 1 + src/js/migrations.js | 14 ++++++++++++++ src/js/options.js | 3 +++ src/js/storage.js | 6 +++--- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index d7d536d77f..b1187e3b76 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -522,6 +522,7 @@ Badger.prototype = { Migrations.unblockIncorrectlyBlockedDomains, Migrations.forgetBlockedDNTDomains, Migrations.reapplyYellowlist, + Migrations.forgetNontrackingDomains, ]; for (var i = migrationLevel; i < migrations.length; i++) { diff --git a/src/js/migrations.js b/src/js/migrations.js index e39dbde596..d3ce68e095 100644 --- a/src/js/migrations.js +++ b/src/js/migrations.js @@ -212,6 +212,20 @@ exports.Migrations= { } }, + forgetNontrackingDomains: function (badger) { + console.log("Forgetting non-tracking domains ..."); + + const actionMap = badger.storage.getBadgerStorageObject("action_map"), + actions = actionMap.getItemClones(); + + for (const domain in actions) { + const map = actions[domain]; + if (map.userAction == "" && map.heuristicAction == "") { + actionMap.deleteItem(domain); + } + } + }, + }; diff --git a/src/js/options.js b/src/js/options.js index 1f205f2ff7..8a1cea0f49 100644 --- a/src/js/options.js +++ b/src/js/options.js @@ -191,6 +191,9 @@ function parseUserDataFile(storageMapsList) { // fix yellowlist getting out of sync migrations.reapplyYellowlist(badger); + // remove any non-tracking domains (in exports from older Badger versions) + migrations.forgetNontrackingDomains(badger); + // Update list to reflect new status of map reloadWhitelist(); refreshFilterPage(); diff --git a/src/js/storage.js b/src/js/storage.js index 76668ef8f2..02fc4c3e5b 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -328,10 +328,10 @@ BadgerPen.prototype = { revertUserAction: function(domain) { this._setupDomainAction(domain, "", "userAction"); - // if unsetting userAction returns the domain's action map to all defaults, - // remove the action map for the domain + // if Privacy Badger never recorded tracking for this domain, + // remove the domain's entry from Privacy Badger's database const actionMap = this.getBadgerStorageObject("action_map"); - if (_.isEqual(actionMap.getItem(domain), _newActionMapObject())) { + if (actionMap.getItem(domain).heuristicAction == "") { log("Removing %s from action_map", domain); actionMap.deleteItem(domain); } From 42f9daf99112fd695e57734217f9a4926d1dd60f Mon Sep 17 00:00:00 2001 From: Alexei Date: Fri, 8 Dec 2017 16:14:42 -0500 Subject: [PATCH 6/7] Skip DNT policy checking when importing user data. --- src/js/heuristicblocking.js | 41 ++++++++++++++++++++++--------------- src/js/storage.js | 3 ++- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/js/heuristicblocking.js b/src/js/heuristicblocking.js index e9212880cc..2e82125f2f 100644 --- a/src/js/heuristicblocking.js +++ b/src/js/heuristicblocking.js @@ -136,24 +136,30 @@ HeuristicBlocker.prototype = { /** * Wraps _recordPrevalence for use outside of webRequest listeners. * - * @param tracker_fqdn The fully qualified domain name of the tracker - * @param page_origin The base domain of the page where the tracker - * was detected - * @returns {*} + * @param {String} tracker_fqdn The fully qualified domain name of the tracker + * @param {String} page_origin The base domain of the page + * where the tracker was detected. + * @param {Boolean} skip_dnt_check Skip DNT policy checking if flag is true. + * */ - updateTrackerPrevalence: function(tracker_fqdn, page_origin) { + updateTrackerPrevalence: function(tracker_fqdn, page_origin, skip_dnt_check) { // abort if we already made a decision for this fqdn let action = this.storage.getAction(tracker_fqdn); if (action != constants.NO_TRACKING && action != constants.ALLOW) { return; } - this._recordPrevalence(tracker_fqdn, window.getBaseDomain(tracker_fqdn), page_origin); + this._recordPrevalence( + tracker_fqdn, + window.getBaseDomain(tracker_fqdn), + page_origin, + skip_dnt_check + ); }, /** * Record HTTP request prevalence. Block a tracker if seen on more - * than [constants.TRACKING_THRESHOLD] pages + * than constants.TRACKING_THRESHOLD pages * * NOTE: This is a private function and should never be called directly. * All calls should be routed through heuristicBlockingAccounting for normal usage @@ -163,13 +169,14 @@ HeuristicBlocker.prototype = { * @param {String} tracker_fqdn The FQDN of the third party tracker * @param {String} tracker_origin Base domain of the third party tracker * @param {String} page_origin The origin of the page where the third party - * tracker was loaded + * tracker was loaded. + * @param {Boolean} skip_dnt_check Skip DNT policy checking if flag is true. */ - _recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin) { - var snitch_map = this.storage.getBadgerStorageObject('snitch_map'); + _recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin, skip_dnt_check) { + var snitchMap = this.storage.getBadgerStorageObject('snitch_map'); var firstParties = []; - if (snitch_map.hasItem(tracker_origin)) { - firstParties = snitch_map.getItem(tracker_origin); + if (snitchMap.hasItem(tracker_origin)) { + firstParties = snitchMap.getItem(tracker_origin); } if (firstParties.indexOf(page_origin) != -1) { @@ -179,13 +186,15 @@ HeuristicBlocker.prototype = { // Check this just-seen-tracking-on-this-site, // not-yet-blocked domain for DNT policy. // We check heuristically-blocked domains in webrequest.js. - window.setTimeout(function () { - badger.checkForDNTPolicy(tracker_fqdn); - }, 10); + if (!skip_dnt_check) { + window.setTimeout(function () { + badger.checkForDNTPolicy(tracker_fqdn); + }, 10); + } // record that we've seen this tracker on this domain (in snitch map) firstParties.push(page_origin); - snitch_map.setItem(tracker_origin, firstParties); + snitchMap.setItem(tracker_origin, firstParties); // ALLOW indicates this is a tracker still below TRACKING_THRESHOLD // (vs. NO_TRACKING for resources we haven't seen perform tracking yet). diff --git a/src/js/storage.js b/src/js/storage.js index 02fc4c3e5b..d9a0f7365a 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -499,7 +499,8 @@ BadgerStorage.prototype = { for (let origin in firstPartyOrigins) { badger.heuristicBlocking.updateTrackerPrevalence( tracker_fqdn, - firstPartyOrigins[origin] + firstPartyOrigins[origin], + true // skip DNT policy checking on data import ); } } From 1c0a3d242820514cf8dcd5617f6e84b1df4265f9 Mon Sep 17 00:00:00 2001 From: Alexei Date: Thu, 18 Jan 2018 16:12:02 -0500 Subject: [PATCH 7/7] Fix some JSDoc type expressions. --- src/js/background.js | 2 +- src/js/storage.js | 20 ++++++++++---------- src/js/surrogates.js | 2 +- src/js/webrequest.js | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index b1187e3b76..d8cdcb10c0 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -214,7 +214,7 @@ Badger.prototype = { * @param {Integer} [frame_id=0] Frame ID to check for. * Optional, defaults to frame 0 (the main document frame). * - * @returns {Object|null} Frame data object or null + * @returns {?Object} Frame data object or null */ getFrameData: function (tab_id, frame_id) { let self = this; diff --git a/src/js/storage.js b/src/js/storage.js index d9a0f7365a..3f4e7ea4d3 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -94,7 +94,7 @@ BadgerPen.prototype = { * Get the current presumed action for a specific fully qualified domain name (FQDN), * ignoring any rules for subdomains below or above it * - * @param {Object|String} domain domain object from action_map + * @param {(Object|String)} domain domain object from action_map * @returns {String} the presumed action for this FQDN **/ getAction: function (domain, ignoreDNT) { @@ -244,8 +244,8 @@ BadgerPen.prototype = { /** * Get the number of domains that the given FQDN has been seen tracking on * - * @param fqdn domain to check status of - * @return int the number of domains fqdn has been tracking on + * @param {String} fqdn domain to check status of + * @return {Integer} the number of domains fqdn has been tracking on */ getTrackingCount: function(fqdn) { var snitch_map = this.getBadgerStorageObject('snitch_map'); @@ -259,9 +259,9 @@ BadgerPen.prototype = { /** * Set up an action for a domain of the given action type in action_map * - * @param domain the domain to set the action for - * @param action the action to take e.g. BLOCK || COOKIEBLOCK || DNT - * @param actionType the type of action we are setting, one of "userAction", "heuristicAction", "dnt" + * @param {String} domain the domain to set the action for + * @param {String} action the action to take e.g. BLOCK || COOKIEBLOCK || DNT + * @param {String} actionType the type of action we are setting, one of "userAction", "heuristicAction", "dnt" * @private */ _setupDomainAction: function (domain, action, actionType) { @@ -305,7 +305,7 @@ BadgerPen.prototype = { /** * Remove DNT setting from a domain* - * @param domain FQDN string + * @param {String} domain FQDN string */ revertDNT: function(domain) { this._setupDomainAction(domain, false, "dnt"); @@ -323,7 +323,7 @@ BadgerPen.prototype = { /** * Remove user set action from a domain - * @param domain FQDN string + * @param {String} domain FQDN string */ revertUserAction: function(domain) { this._setupDomainAction(domain, "", "userAction"); @@ -392,7 +392,7 @@ BadgerStorage.prototype = { * Check if this storage object has an item * * @param {String} key - the key for the item - * @return boolean + * @return {Boolean} **/ hasItem: function(key) { var self = this; @@ -403,7 +403,7 @@ BadgerStorage.prototype = { * Get an item * * @param {String} key - the key for the item - * @return the value for that key or null + * @return {?*} the value for that key or null **/ getItem: function(key) { var self = this; diff --git a/src/js/surrogates.js b/src/js/surrogates.js index 3095103583..5389afd82d 100644 --- a/src/js/surrogates.js +++ b/src/js/surrogates.js @@ -34,7 +34,7 @@ const db = require('surrogatedb'); * parameter. This is an optimization: the calling context should already have * this information. * - * @return {String|Boolean} The surrogate script as a data URI when there is a + * @return {(String|Boolean)} The surrogate script as a data URI when there is a * match, or boolean false when there is no match. */ function getSurrogateURI(script_url, script_hostname) { diff --git a/src/js/webrequest.js b/src/js/webrequest.js index 5d95a4d4b8..9db3965ebf 100644 --- a/src/js/webrequest.js +++ b/src/js/webrequest.js @@ -487,7 +487,7 @@ function forgetTab(tabId) { * @param {Integer} tabId The relevant tab * @param {String} requestHost The FQDN * @param {Integer} frameId The id of the frame - * @returns {String|Boolean} false or the action to take + * @returns {(String|Boolean)} false or the action to take */ function checkAction(tabId, requestHost, frameId) { // Ignore requests from temporarily unblocked social widgets.