Skip to content

Commit

Permalink
Merge pull request #20 from Eytan414/#4-filter-for-chained-nodes
Browse files Browse the repository at this point in the history
remove chained ads in case of regex-filter hit
  • Loading branch information
DrKain authored Aug 27, 2022
2 parents 589525a + 60b3de6 commit 901ba95
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 5 deletions.
4 changes: 4 additions & 0 deletions lib/interface.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export interface IArguments {
* Do not log anything to the console
*/
silent: boolean;
/**
* Attempt to remove chained ads (s su sub subt...)
*/
chainedads: boolean;
/**
* Expects directory. This will clean multiple files across multiple directories and subdirectories.
* Use the depth parameter to limit how many directories deep subclean will look.
Expand Down
29 changes: 24 additions & 5 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /usr/bin/env node
import { statSync, existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from 'fs';
import { dirname, join, resolve, extname, basename } from 'path';
import { parseSync, stringifySync, Format } from 'subtitle';
import { parseSync, stringifySync, Format, Cue, NodeList } from 'subtitle';
import { help_text } from './help';
import { get } from 'https';
import { IArguments } from './interface';
Expand Down Expand Up @@ -37,6 +37,7 @@ class SubClean {
silent: argv.silent || argv.s || false,
version: argv.version || argv.v || false,
update: argv.update || false,
chainedads: argv.a || argv.chainedads || false,
sweep: argv.sweep || '',
depth: argv.depth ?? 10,
ne: argv['ne'] || false,
Expand Down Expand Up @@ -302,26 +303,44 @@ class SubClean {
// Remove all cases of \r (parser can not handle these)
fileData = fileData.replace(/\r/g, ' ');

const nodes = parseSync(fileData);
const nodes:NodeList = parseSync(fileData);
let hits = 0;

// For debugging
this.nodes_count += nodes.length;

//Chained ads flag
const checkForChainedAds:boolean = item.chainedads;

// Remove ads
nodes.forEach((node: any, index) => {
nodes.forEach((node: any, index:number) => {
this.blacklist.forEach((mark: any) => {
let regex = null;
this.actions_count++;
const nodeText:string = node.data.text;

if (mark.startsWith('/') && mark.endsWith('/')) {
// remove first and last characters
regex = new RegExp(mark.substring(1, mark.length - 1), 'i');
if (regex.exec(node.data.text)) {
if (regex.exec(nodeText)) {
this.log(`[Match] Advertising found in node ${index + 1} (${mark})`);
if (this.args.debug) this.log('[Line] ' + node.data.text);
if (this.args.debug) this.log('[Line] ' + nodeText);
hits++;
node.data.text = '';

if (index > 0 && checkForChainedAds){
const previousNodeText = (nodes[index-1].data as Cue).text;
if(nodeText.includes(previousNodeText)){
for(let i=index-1; i>0; i--){
const currentIterationText = (nodes[i].data as Cue).text;
if(currentIterationText.length === 0) continue; //ignore empty string nodes
if(!nodeText.includes(currentIterationText)) break;//chain stopped

hits++;
(nodes[i].data as Cue).text = '';
}
}
}
}
} else {
if (node.data.text.toLowerCase().includes(mark)) {
Expand Down
4 changes: 4 additions & 0 deletions src/interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export interface IArguments {
* Do not log anything to the console
*/
silent: boolean;
/**
* Attempt to remove chained ads (s su sub subt...)
*/
chainedads: boolean;
/**
* Expects directory. This will clean multiple files across multiple directories and subdirectories.
* Use the depth parameter to limit how many directories deep subclean will look.
Expand Down

0 comments on commit 901ba95

Please sign in to comment.