Skip to content

Commit

Permalink
Add pseudo-operator :matches-attr()
Browse files Browse the repository at this point in the history
Related issue:
- uBlockOrigin/uBlock-issues#2329

The supported syntax is exactly as per AdGuard's documentation:
- https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#extended-css-matches-attr

Though recommended, the quotes are not mandatory in uBO if
the argument does not cause the parser to fail and if there
are no ambiguities.

Additionally, improved the code to better unquote pseudo-operator
arguments, and to bring it closer to how AdGuard does it as per
documentation. When using quotes, `"` and `\` should be escaped
to preserve these characters in the unquoted version of the
argument.

Additionally, it is now possible to have `:has-text()` match the
empty string by just quoting the empty string:

    ...##foo:has-text("")
  • Loading branch information
gorhill committed Dec 2, 2022
1 parent 6140e55 commit 76d7010
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 12 deletions.
30 changes: 30 additions & 0 deletions src/js/contentscript-extra.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ const nonVisualElements = {
style: true,
};

const regexFromString = (s, exact = false) => {
if ( s === '' ) { return /^/; }
if ( /^".+"$/.test(s) ) {
s = s.slice(1,-1).replace(/\\(\\|")/g, '$1');
}
const match = /^\/(.+)\/([i]?)$/.exec(s);
if ( match !== null ) {
return new RegExp(match[1], match[2] || undefined);
}
const reStr = s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(exact ? `^${reStr}$` : reStr, 'i');
};

// 'P' stands for 'Procedural'

class PSelectorTask {
Expand Down Expand Up @@ -85,6 +98,22 @@ class PSelectorIfNotTask extends PSelectorIfTask {
}
PSelectorIfNotTask.prototype.target = false;

class PSelectorMatchesAttrTask extends PSelectorTask {
constructor(task) {
super();
this.reAttr = regexFromString(task[1].attr, true);
this.reValue = regexFromString(task[1].value, true);
}
transpose(node, output) {
const attrs = node.getAttributeNames();
for ( const attr of attrs ) {
if ( this.reAttr.test(attr) === false ) { continue; }
if ( this.reValue.test(node.getAttribute(attr)) === false ) { continue; }
output.push(node);
}
}
}

class PSelectorMatchesCSSTask extends PSelectorTask {
constructor(task) {
super();
Expand Down Expand Up @@ -350,6 +379,7 @@ class PSelector {
[ 'has-text', PSelectorHasTextTask ],
[ 'if', PSelectorIfTask ],
[ 'if-not', PSelectorIfNotTask ],
[ 'matches-attr', PSelectorMatchesAttrTask ],
[ 'matches-css', PSelectorMatchesCSSTask ],
[ 'matches-css-after', PSelectorMatchesCSSAfterTask ],
[ 'matches-css-before', PSelectorMatchesCSSBeforeTask ],
Expand Down
78 changes: 66 additions & 12 deletions src/js/static-filtering-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,7 @@ Parser.prototype.SelectorCompiler = class {
'has-text',
'if',
'if-not',
'matches-attr',
'matches-css',
'matches-css-after',
'matches-css-before',
Expand Down Expand Up @@ -1855,6 +1856,8 @@ Parser.prototype.SelectorCompiler = class {
return this.compileSelector(arg);
case 'if-not':
return this.compileSelector(arg);
case 'matches-attr':
return this.compileMatchAttrArgument(arg);
case 'matches-css':
return this.compileCSSDeclaration(arg);
case 'matches-css-after':
Expand Down Expand Up @@ -1894,31 +1897,81 @@ Parser.prototype.SelectorCompiler = class {
return false;
}

extractArg(s) {
if ( /^(['"]).+\1$/.test(s) ) {
s = s.slice(1, -1);
unquoteString(s) {
const end = s.length;
if ( end === 0 ) {
return { s: '', end };
}
return s.replace(/\\(['"])/g, '$1');
if ( /^['"]/.test(s) === false ) {
return { s, i: end };
}
const quote = s.charCodeAt(0);
const out = [];
let i = 1, c = 0;
for (;;) {
c = s.charCodeAt(i);
if ( c === quote ) {
i += 1;
break;
}
if ( c === 0x5C /* '\\' */ ) {
i += 1;
if ( i === end ) { break; }
c = s.charCodeAt(i);
if ( c !== 0x5C && c !== quote ) {
out.push('\\');
}
}
out.push(c);
i += 1;
if ( i === end ) { break; }
}
return { s: String.fromCharCode(...out), i };
}

compileMatchAttrArgument(s) {
if ( s === '' ) { return; }
let attr = '', value = '';
let r = this.unquoteString(s);
if ( r.i === s.length ) {
const pos = r.s.indexOf('=');
if ( pos === -1 ) {
attr = r.s;
} else {
attr = r.s.slice(0, pos);
value = r.s.slice(pos + 1);
}
} else {
attr = r.s;
if ( s.charCodeAt(r.i) !== 0x3D ) { return; }
r = this.unquoteString(s.slice(r.i+1));
value = r.s;
}
if ( attr === '' ) { return; }
return { attr, value };
}

// When dealing with literal text, we must first eat _some_
// backslash characters.
// Remove potentially present quotes before processing.
compileText(s) {
if ( s === '' ) { return; }
s = this.extractArg(s);
const match = this.reParseRegexLiteral.exec(s);
const r = this.unquoteString(s);
if ( r.i !== s.length ) { return; }
const match = this.reParseRegexLiteral.exec(r.s);
let regexDetails;
if ( match !== null ) {
regexDetails = match[1];
if ( this.isBadRegex(regexDetails) ) { return; }
if ( match[2] ) {
regexDetails = [ regexDetails, match[2] ];
}
} else if ( r.s === '' ) {
regexDetails = '^$';
} else {
regexDetails = s.replace(this.reEatBackslashes, '$1')
.replace(this.reEscapeRegex, '\\$&');
this.regexToRawValue.set(regexDetails, s);
regexDetails = r.s.replace(this.reEatBackslashes, '$1')
.replace(this.reEscapeRegex, '\\$&');
this.regexToRawValue.set(regexDetails, r.s);
}
return regexDetails;
}
Expand Down Expand Up @@ -2010,13 +2063,14 @@ Parser.prototype.SelectorCompiler = class {
}

compileXpathExpression(s) {
s = this.extractArg(s);
const r = this.unquoteString(s);
if ( r.i !== s.length ) { return; }
try {
self.document.createExpression(s, null);
self.document.createExpression(r.s, null);
} catch (e) {
return;
}
return s;
return r.s;
}
};

Expand Down

0 comments on commit 76d7010

Please sign in to comment.