Skip to content

Commit

Permalink
feat(parameter): use re2js instead (DIYgod#13072)
Browse files Browse the repository at this point in the history
* feat(parameter): use re2js instead

* fix: add back comment
  • Loading branch information
Tony authored and auto-bot-ty committed Aug 19, 2023
1 parent 84b7d3f commit 48782f3
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 57 deletions.
4 changes: 2 additions & 2 deletions lib/middleware/anti-hotlink.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ const validateTemplate = (template) => {
module.exports = async (ctx, next) => {
await next();

let image_hotlink_template = undefined;
let multimedia_hotlink_template = undefined;
let image_hotlink_template;
let multimedia_hotlink_template;
const shouldWrapInIframe = ctx.query.wrap_multimedia_in_iframe === '1';

// Read params if enabled
Expand Down
94 changes: 47 additions & 47 deletions lib/middleware/parameter.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ const cheerio = require('cheerio');
const { simplecc } = require('simplecc-wasm');
const got = require('@/utils/got');
const config = require('@/config').value;
const { RE2 } = require('re2-wasm');
const { resolve } = require('path');
const { RE2JS } = require('re2js');

let mercury_parser;

Expand All @@ -28,17 +27,6 @@ module.exports = async (ctx, next) => {
await next();

if (!ctx.state.data && !ctx._matchedRoute) {
// Given that the official demo has a cache TTL of 2h, a "wrong path" page will be cached by CloudFlare for
// 200h (8.33d).
// What makes it worse is that the documentation contains status badges to detect the availability of routes,
// but the documentation is updated more timely than the official demo, so the every example path of every
// new route will probably have a "wrong path" page cached for at least 200h soon after accepted. That is to
// say, the example paths of a new route will probably be unavailable on the public demo in the first 200h
// after accepted.
// As a conclusion, the next 3 lines has been commented out. (exactly the same behavior as any internal error)
// ctx.set({
// 'Cache-Control': `public, max-age=${config.cache.routeExpire * 100}`,
// });
throw Error('wrong path');
}

Expand Down Expand Up @@ -160,41 +148,37 @@ module.exports = async (ctx, next) => {
}

// filter
const engine = config.feature.filter_regex_engine;
const makeRegex = (string) => {
if (!string) {
return null;
}
// default: case_senstivie = true
const engine = config.feature.filter_regex_engine;
if (ctx.query.filter_case_sensitive === 'false') {
switch (engine) {
case 'regexp':
return new RegExp(string, 'i');
case 're2':
return new RE2(string, 'iu');
default:
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
}
} else {
switch (engine) {
case 'regexp':
return new RegExp(string);
case 're2':
return new RE2(string, 'u');
default:
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
}
const insensitive = ctx.query.filter_case_sensitive === 'false';
switch (engine) {
case 'regexp':
return new RegExp(string, insensitive ? 'i' : '');
case 're2':
return RE2JS.compile(string, insensitive ? RE2JS.CASE_INSENSITIVE : 0);
default:
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
}
};

if (ctx.query.filter) {
// workaround for @vercel/nft removing wasm file
resolve('node_modules/re2-wasm/build/wasm/re2.wasm');
const regex = makeRegex(ctx.query.filter);

ctx.state.data.item = ctx.state.data.item.filter((item) => {
const title = item.title || '';
const description = item.description || title;
const author = item.author || '';
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
const category = item.category ? categoryArray : [];
const isFilter =
title.match(makeRegex(ctx.query.filter)) || description.match(makeRegex(ctx.query.filter)) || author.match(makeRegex(ctx.query.filter)) || category.some((c) => c.match(makeRegex(ctx.query.filter)));
engine === 're2'
? regex.matcher(title).find() || regex.matcher(description).find() || regex.matcher(author).find() || category.some((c) => regex.matcher(c).find())
: title.match(regex) || description.match(regex) || author.match(regex) || category.some((c) => c.match(regex));

return isFilter;
});
}
Expand All @@ -205,12 +189,20 @@ module.exports = async (ctx, next) => {
const title = item.title || '';
const description = item.description || title;
const author = item.author || '';
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
const category = item.category ? categoryArray : [];
let isFilter = true;
ctx.query.filter_title && (isFilter = title.match(makeRegex(ctx.query.filter_title)));
ctx.query.filter_description && (isFilter = isFilter && description.match(makeRegex(ctx.query.filter_description)));
ctx.query.filter_author && (isFilter = isFilter && author.match(makeRegex(ctx.query.filter_author)));
ctx.query.filter_category && (isFilter = isFilter && category.some((c) => c.match(makeRegex(ctx.query.filter_category))));

const titleRegex = makeRegex(ctx.query.filter_title);
const descriptionRegex = makeRegex(ctx.query.filter_description);
const authorRegex = makeRegex(ctx.query.filter_author);
const categoryRegex = makeRegex(ctx.query.filter_category);

ctx.query.filter_title && (isFilter = engine === 're2' ? titleRegex.matcher(title).find() : title.match(titleRegex));
ctx.query.filter_description && (isFilter = isFilter && (engine === 're2' ? descriptionRegex.matcher(description).find() : description.match(descriptionRegex)));
ctx.query.filter_author && (isFilter = isFilter && (engine === 're2' ? authorRegex.matcher(author).find() : author.match(authorRegex)));
ctx.query.filter_category && (isFilter = isFilter && category.some((c) => (engine === 're2' ? categoryRegex.matcher(c).find() : c.match(categoryRegex))));

return isFilter;
});
}
Expand All @@ -224,12 +216,20 @@ module.exports = async (ctx, next) => {
const title = item.title;
const description = item.description || title;
const author = item.author || '';
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
const category = item.category ? categoryArray : [];
let isFilter = true;
ctx.query.filterout_title && (isFilter = !title.match(makeRegex(ctx.query.filterout_title)));
ctx.query.filterout_description && (isFilter = isFilter && !description.match(makeRegex(ctx.query.filterout_description)));
ctx.query.filterout_author && (isFilter = isFilter && !author.match(makeRegex(ctx.query.filterout_author)));
ctx.query.filterout_category && (isFilter = isFilter && !category.some((c) => c.match(makeRegex(ctx.query.filterout_category))));

const titleRegex = makeRegex(ctx.query.filterout_title);
const descriptionRegex = makeRegex(ctx.query.filterout_description);
const authorRegex = makeRegex(ctx.query.filterout_author);
const categoryRegex = makeRegex(ctx.query.filterout_category);

ctx.query.filterout_title && (isFilter = engine === 're2' ? !titleRegex.matcher(title).find() : !title.match(titleRegex));
ctx.query.filterout_description && (isFilter = isFilter && (engine === 're2' ? !descriptionRegex.matcher(description).find() : !description.match(descriptionRegex)));
ctx.query.filterout_author && (isFilter = isFilter && (engine === 're2' ? !authorRegex.matcher(author).find() : !author.match(authorRegex)));
ctx.query.filterout_category && (isFilter = isFilter && !category.some((c) => (engine === 're2' ? categoryRegex.matcher(c).find() : c.match(categoryRegex))));

return isFilter;
});
}
Expand Down Expand Up @@ -308,7 +308,7 @@ module.exports = async (ctx, next) => {
if (item.description) {
text = item.description.replace(/<\/?[^>]+(>|$)/g, '');
}
if (text && text.length) {
if (text?.length) {
if (text.length > ctx.query.brief) {
item.description = `<p>${text.substring(0, ctx.query.brief)}…</p>`;
} else {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
"puppeteer-extra-plugin-user-preferences": "2.4.1",
"query-string": "7.1.3",
"rand-user-agent": "1.0.109",
"re2-wasm": "1.0.2",
"re2js": "0.3.2",
"require-all": "3.0.0",
"rfc4648": "1.5.2",
"rss-parser": "3.13.0",
Expand Down
11 changes: 5 additions & 6 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion test/middleware/filter-engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ describe('filter-engine', () => {

const response = await request.get('/test/1?filter=abc(%3F%3Ddef)');
expect(response.status).toBe(404);
expect(response.text).toMatch(/SyntaxError/);
expect(response.text).toMatch(/RE2JSSyntaxException/);
server.close();
});

Expand Down

0 comments on commit 48782f3

Please sign in to comment.