Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Accept GDPR banner consent. #372

Merged
merged 11 commits into from
Mar 28, 2024
8 changes: 7 additions & 1 deletion packages/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@
.option(
'-d, --out-dir <value>',
'Directory path where the analysis data will be stored'
)
.option(
'-ab, --accept-banner',
'This will accept the GDPR banner if present.'
);

program.parse();
Expand Down Expand Up @@ -116,6 +120,7 @@
const shouldSkipPrompts = !program.opts().prompts;
const shouldSkipTechnologyAnalysis = !program.opts().technology;
const outDir = program.opts().outDir;
const shouldSkipAcceptBanner = program.opts().acceptBanner;

validateArgs(
url,
Expand Down Expand Up @@ -187,14 +192,15 @@
DELAY_TIME,
cookieDictionary,
3,
urlsToProcess.length !== 1 ? spinnies : undefined
urlsToProcess.length !== 1 ? spinnies : undefined,
shouldSkipAcceptBanner
);

spinnies.succeed('cookie-spinner', {
text: 'Done analyzing cookies.',
});

let technologyAnalysisData: any = null;

Check warning on line 203 in packages/cli/src/index.ts

View workflow job for this annotation

GitHub Actions / Lint

Unexpected any. Specify a different type

if (!shouldSkipTechnologyAnalysis) {
spinnies.add('technology-spinner', {
Expand Down
8 changes: 6 additions & 2 deletions packages/cli/src/procedures/analyzeCookieUrls.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ export const analyzeCookiesUrls = async (
urls: string[],
isHeadless: boolean,
delayTime: number,
cookieDictionary: CookieDatabase
cookieDictionary: CookieDatabase,
shouldSkipAcceptBanner: boolean
) => {
const browser = new BrowserManagement(
{
Expand All @@ -43,7 +44,10 @@ export const analyzeCookiesUrls = async (
);

await browser.initializeBrowser(true);
const analysisCookieData = await browser.analyzeCookieUrls(urls);
const analysisCookieData = await browser.analyzeCookieUrls(
urls,
shouldSkipAcceptBanner
);

const res = analysisCookieData.map(({ pageUrl, cookieData }) => {
Object.entries(cookieData).forEach(([, frameData]) => {
Expand Down
6 changes: 4 additions & 2 deletions packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ export const analyzeCookiesUrlsInBatches = async (
id: string,
{ text, indent }: { text: string; indent: number }
) => void;
}
},
shouldSkipAcceptBanner = false
) => {
let report: {
pageUrl: string;
Expand Down Expand Up @@ -66,7 +67,8 @@ export const analyzeCookiesUrlsInBatches = async (
urlsWindow,
isHeadless,
delayTime,
cookieDictionary
cookieDictionary,
shouldSkipAcceptBanner
);

report = [...report, ...cookieAnalysis];
Expand Down
84 changes: 76 additions & 8 deletions packages/cli/src/utils/browserManagement/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
this.shouldLogDebug = shouldLogDebug;
}

debugLog(msg: any) {

Check warning on line 57 in packages/cli/src/utils/browserManagement/index.ts

View workflow job for this annotation

GitHub Actions / Lint

Unexpected any. Specify a different type
if (this.shouldLogDebug) {
console.log(msg);
}
Expand All @@ -75,6 +75,52 @@
this.debugLog('browser intialized');
}

async clickOnAcceptBanner(url: string) {
const page = this.pageMap.get(url);

if (!page) {
throw new Error('no page with the provided id was found');
}

await page.evaluate(() => {
const bannerNodes: Element[] = Array.from(
(document.querySelector('body')?.childNodes || []) as Element[]
)
.filter((node: Element) => node && node?.tagName === 'DIV')
.filter((node) => {
if (!node || !node?.textContent) {
return false;
}
const regex =
/\b(consent|policy|cookie policy|privacy policy|personalize|preferences)\b/;

return regex.test(node.textContent.toLowerCase());
});

if (bannerNodes.length > 0) {
this.debugLog(`found GDPR banner in the page.`);
}

const buttonToClick: HTMLButtonElement[] = bannerNodes
.map((node: Element) => {
const buttonNodes = Array.from(node.getElementsByTagName('button'));
const isButtonForAccept = buttonNodes.filter(
(cnode) =>
cnode.textContent &&
(cnode.textContent.toLowerCase().includes('accept') ||
cnode.textContent.toLowerCase().includes('allow') ||
cnode.textContent.toLowerCase().includes('agree'))
);

return isButtonForAccept[0];
})
.filter((button) => button);
buttonToClick[0]?.click();
});

await delay(this.pageWaitTime / 2);
}

async openPage(): Promise<Page> {
if (!this.browser) {
throw new Error('Browser not intialized');
Expand All @@ -90,33 +136,51 @@
height: 790,
deviceScaleFactor: 1,
});

this.debugLog('Page opened');

return sitePage;
}

async navigateAndScroll(url: string) {
async navigateToPage(url: string) {
const page = this.pageMap.get(url);

if (!page) {
throw new Error('no page with the provided id was found');
}

this.debugLog(`starting navigation to url ${url}`);

try {
await page.goto(url, { timeout: 10000 });
this.debugLog(`done with navigation to url:${url}`);
} catch (error) {
this.debugLog(
`navigation did not finish in 10 seconds moving on to scrolling`
);
//ignore
}
}

await delay(this.pageWaitTime / 2);
async pageScroll(url: string) {
const page = this.pageMap.get(url);

await page.evaluate(() => {
window.scrollBy(0, 10000);
});
if (!page) {
throw new Error('no page with the provided id was found');
}

try {
await page.evaluate(() => {
window.scrollBy(0, 10000);
});
} catch (error) {
this.debugLog(`scrolling the page to the end.`);
//ignore
}

await delay(this.pageWaitTime / 2);
this.debugLog(`done navigating and scrolling to url:${url}`);

this.debugLog(`scrolling on url:${url}`);
}

async attachNetworkListenersToPage(pageId: string) {
Expand Down Expand Up @@ -240,7 +304,7 @@
}
const frameIdMapFromTree = new Map();
const frames = page.frames();
const frameCallback = (frame: any) => {

Check warning on line 307 in packages/cli/src/utils/browserManagement/index.ts

View workflow job for this annotation

GitHub Actions / Lint

Unexpected any. Specify a different type
const frameId = frame._id;
const _url = frame.url();
frameIdMapFromTree.set(frameId, _url);
Expand All @@ -254,7 +318,7 @@
return frameIdMapFromTree;
}

async analyzeCookieUrls(urls: string[]) {
async analyzeCookieUrls(urls: string[], shouldSkipAcceptBanner: boolean) {
for (const url of urls) {
const sitePage = await this.openPage();
this.pageMap.set(url, sitePage);
Expand All @@ -264,7 +328,11 @@
// start navigation in parallel
await Promise.all(
urls.map(async (url) => {
await this.navigateAndScroll(url);
await this.navigateToPage(url);
if (shouldSkipAcceptBanner) {
await this.clickOnAcceptBanner(url);
}
await this.pageScroll(url);
})
);

Expand Down
Loading