Skip to content

Commit

Permalink
Add support for regex-based values as target domain for static extend…
Browse files Browse the repository at this point in the history
…ed filters

Related discussion:
- uBlockOrigin/uBlock-issues#2234

Example of usage:

    /img[a-z]{3,5}\.buzz/##+js(nowoif)

Use sparingly, when no other solution is practical from a maintenance point
of view -- keeping in mind that uBO has to iterate through all the regex-based
values, unlike plain hosyname or entity-based values which are mere lookups.

Related commit:
- b1de8d3
  • Loading branch information
gorhill committed Jan 31, 2023
1 parent c455490 commit 8149847
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 46 deletions.
26 changes: 14 additions & 12 deletions src/js/cosmetic-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -811,31 +811,33 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
}

// Retrieve filters with a non-empty hostname
const retrieveSets = [ specificSet, exceptionSet, proceduralSet, exceptionSet ];
const discardSets = [ dummySet, exceptionSet ];
this.specificFilters.retrieve(
hostname,
options.noSpecificCosmeticFiltering !== true
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
1
);
// Retrieve filters with an empty hostname
// Retrieve filters with a regex-based hostname value
this.specificFilters.retrieve(
hostname,
options.noGenericCosmeticFiltering !== true
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
2
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
3
);
// Retrieve filters with a non-empty entity
// Retrieve filters with a entity-based hostname value
if ( request.entity !== '' ) {
this.specificFilters.retrieve(
`${hostname.slice(0, -request.domain.length)}${request.entity}`,
options.noSpecificCosmeticFiltering !== true
? [ specificSet, exceptionSet, proceduralSet, exceptionSet ]
: [ dummySet, exceptionSet ],
options.noSpecificCosmeticFiltering ? discardSets : retrieveSets,
1
);
}
// Retrieve filters with an empty hostname
this.specificFilters.retrieve(
hostname,
options.noGenericCosmeticFiltering ? discardSets : retrieveSets,
2
);

if ( exceptionSet.size !== 0 ) {
out.exceptionFilters = Array.from(exceptionSet);
Expand Down
16 changes: 3 additions & 13 deletions src/js/html-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import logger from './logger.js';
import µb from './background.js';
import { sessionFirewall } from './filtering-engines.js';
import { StaticExtFilteringHostnameDB } from './static-ext-filtering-db.js';
import * as sfp from './static-filtering-parser.js';

/******************************************************************************/

Expand Down Expand Up @@ -315,9 +314,6 @@ htmlFilteringEngine.freeze = function() {

htmlFilteringEngine.compile = function(parser, writer) {
const isException = parser.isException();
const root = parser.getBranchFromType(sfp.NODE_TYPE_EXT_PATTERN_HTML);
const headerName = parser.getNodeString(root);

const { raw, compiled } = parser.result;
if ( compiled === undefined ) {
const who = writer.properties.get('name') || '?';
Expand Down Expand Up @@ -380,19 +376,13 @@ htmlFilteringEngine.retrieve = function(details) {
const plains = new Set();
const procedurals = new Set();
const exceptions = new Set();
const retrieveSets = [ plains, exceptions, procedurals, exceptions ];

filterDB.retrieve(
hostname,
[ plains, exceptions, procedurals, exceptions ]
);
filterDB.retrieve(hostname, retrieveSets);
const entity = details.entity !== ''
? `${hostname.slice(0, -details.domain.length)}${details.entity}`
: '*';
filterDB.retrieve(
entity,
[ plains, exceptions, procedurals, exceptions ],
1
);
filterDB.retrieve(entity, retrieveSets, 1);

if ( plains.size === 0 && procedurals.size === 0 ) { return; }

Expand Down
11 changes: 8 additions & 3 deletions src/js/reverselookup-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,14 @@ const fromExtendedFilter = function(details) {
}

const hostnameMatches = hn => {
return hn === '' ||
reHostname.test(hn) ||
reEntity !== undefined && reEntity.test(hn);
if ( hn === '' ) { return true; }
if ( hn.charCodeAt(0) === 0x2F /* / */ ) {
return (new RegExp(hn.slice(1,-1))).test(hostname);
}
if ( reHostname.test(hn) ) { return true; }
if ( reEntity === undefined ) { return false; }
if ( reEntity.test(hn) ) { return true; }
return false;
};

const response = Object.create(null);
Expand Down
61 changes: 46 additions & 15 deletions src/js/static-ext-filtering-db.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ const StaticExtFilteringHostnameDB = class {
this.timer = undefined;
this.strToIdMap = new Map();
this.hostnameToSlotIdMap = new Map();
this.regexToSlotIdMap = new Map();
this.regexMap = new Map();
// Array of integer pairs
this.hostnameSlots = [];
// Array of strings (selectors and pseudo-selectors)
Expand All @@ -51,9 +53,16 @@ const StaticExtFilteringHostnameDB = class {
}
}
const strId = iStr << this.nBits | bits;
let iHn = this.hostnameToSlotIdMap.get(hn);
const hnIsNotRegex = hn.charCodeAt(0) !== 0x2F /* / */;
let iHn = hnIsNotRegex
? this.hostnameToSlotIdMap.get(hn)
: this.regexToSlotIdMap.get(hn);
if ( iHn === undefined ) {
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
if ( hnIsNotRegex ) {
this.hostnameToSlotIdMap.set(hn, this.hostnameSlots.length);
} else {
this.regexToSlotIdMap.set(hn, this.hostnameSlots.length);
}
this.hostnameSlots.push(strId, 0);
return;
}
Expand All @@ -67,9 +76,11 @@ const StaticExtFilteringHostnameDB = class {

clear() {
this.hostnameToSlotIdMap.clear();
this.regexToSlotIdMap.clear();
this.hostnameSlots.length = 0;
this.strSlots.length = 0;
this.strToIdMap.clear();
this.regexMap.clear();
this.size = 0;
}

Expand All @@ -92,39 +103,55 @@ const StaticExtFilteringHostnameDB = class {
);
}

// modifiers = 1: return only specific items
// modifiers = 2: return only generic items
// modifiers = 0: all items
// modifiers = 1: only specific items
// modifiers = 2: only generic items
// modifiers = 3: only regex-based items
//
retrieve(hostname, out, modifiers = 0) {
if ( modifiers === 2 ) {
hostname = '';
}
let hn = hostname;
if ( modifiers === 2 ) { hn = ''; }
const mask = out.length - 1; // out.length must be power of two
for (;;) {
let iHn = this.hostnameToSlotIdMap.get(hostname);
let iHn = this.hostnameToSlotIdMap.get(hn);
if ( iHn !== undefined ) {
do {
const strId = this.hostnameSlots[iHn+0];
out[strId & mask].add(
this.strSlots[strId >>> this.nBits]
);
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
iHn = this.hostnameSlots[iHn+1];
} while ( iHn !== 0 );
}
if ( hostname === '' ) { break; }
const pos = hostname.indexOf('.');
if ( hn === '' ) { break; }
const pos = hn.indexOf('.');
if ( pos === -1 ) {
if ( modifiers === 1 ) { break; }
hostname = '';
hn = '';
} else {
hostname = hostname.slice(pos + 1);
hn = hn.slice(pos + 1);
}
}
if ( modifiers !== 0 && modifiers !== 3 ) { return; }
// TODO: consider using a combined regex to test once for whether
// iterating is worth it.
for ( const restr of this.regexToSlotIdMap.keys() ) {
let re = this.regexMap.get(restr);
if ( re === undefined ) {
this.regexMap.set(restr, (re = new RegExp(restr.slice(1,-1))));
}
if ( re.test(hostname) === false ) { continue; }
let iHn = this.regexToSlotIdMap.get(restr);
do {
const strId = this.hostnameSlots[iHn+0];
out[strId & mask].add(this.strSlots[strId >>> this.nBits]);
iHn = this.hostnameSlots[iHn+1];
} while ( iHn !== 0 );
}
}

toSelfie() {
return {
hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap),
regexToSlotIdMap: Array.from(this.regexToSlotIdMap),
hostnameSlots: this.hostnameSlots,
strSlots: this.strSlots,
size: this.size
Expand All @@ -134,6 +161,10 @@ const StaticExtFilteringHostnameDB = class {
fromSelfie(selfie) {
if ( selfie === undefined ) { return; }
this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap);
// Regex-based lookup available in uBO 1.47.0 and above
if ( Array.isArray(selfie.regexToSlotIdMap) ) {
this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap);
}
this.hostnameSlots = selfie.hostnameSlots;
this.strSlots = selfie.strSlots;
this.size = selfie.size;
Expand Down
5 changes: 2 additions & 3 deletions src/js/static-filtering-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -1066,8 +1066,7 @@ export class AstFilterParser {
realBad = true;
break;
case NODE_TYPE_NET_OPTION_NAME_WEBRTC:
bad = true;
realBad = isNegated || hasValue;
realBad = true;
break;
case NODE_TYPE_NET_PATTERN:
realBad = this.hasOptions() === false &&
Expand Down Expand Up @@ -1784,7 +1783,7 @@ export class AstFilterParser {
);
this.addFlags(AST_FLAG_HAS_OPTIONS);
this.addNodeToRegister(NODE_TYPE_EXT_OPTIONS, next);
this.linkDown(next, this.parseDomainList(next, ',', 0b01110));
this.linkDown(next, this.parseDomainList(next, ',', 0b11110));
prev = this.linkRight(prev, next);
}
next = this.allocTypedNode(
Expand Down

0 comments on commit 8149847

Please sign in to comment.