From 28e64410973184fa7fa2f0993c059bde690142b6 Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Fri, 29 Dec 2023 12:46:15 +0530 Subject: [PATCH 1/9] Feat: Click on accept all cookies button from the cli. --- .../cli/src/utils/browserManagement/index.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index c7bdcc5fb..0e87aca29 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -94,6 +94,21 @@ export class BrowserManagement { this.debugLog('3p cookies blocked'); } + async clickOnAcceptBanner(sitePage: Page) { + const acceptAllCookiesBanner = await sitePage.$( + 'button[id="onetrust-accept-btn-handler"]' + ); + + if (acceptAllCookiesBanner) { + await acceptAllCookiesBanner.evaluate((button) => { + button.click(); + }); + this.debugLog('Found and accepted all cookies in GDPR banner'); + } else { + this.debugLog('Couldnt find accept GDPR banner'); + } + } + async openPage(): Promise { if (!this.browser) { throw new Error('Browser not intialized'); @@ -105,6 +120,7 @@ export class BrowserManagement { deviceScaleFactor: 1, }); this.debugLog('Page opened'); + return sitePage; } @@ -122,7 +138,7 @@ export class BrowserManagement { ); //ignore } - + await this.clickOnAcceptBanner(page); await delay(this.pageWaitTime / 2); await page.evaluate(() => { From df0f5843f44b0f3481f486b3222005215d97df7a Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Fri, 29 Dec 2023 14:45:07 +0530 Subject: [PATCH 2/9] Incorporate trustarc consent management system. --- packages/cli/src/utils/browserManagement/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index 0e87aca29..c95e33416 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -96,7 +96,7 @@ export class BrowserManagement { async clickOnAcceptBanner(sitePage: Page) { const acceptAllCookiesBanner = await sitePage.$( - 'button[id="onetrust-accept-btn-handler"]' + 'button[id="onetrust-accept-btn-handler"], button[id="truste-consent-button"]' ); if (acceptAllCookiesBanner) { From 851e75295ffb6454e5ca699a07cad70b38a2ee8b Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Fri, 29 Dec 2023 15:28:55 +0530 Subject: [PATCH 3/9] Prettify code to handle multiple consent managers. --- .../cli/src/utils/browserManagement/index.ts | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index c95e33416..c049ee423 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -95,14 +95,31 @@ export class BrowserManagement { } async clickOnAcceptBanner(sitePage: Page) { - const acceptAllCookiesBanner = await sitePage.$( - 'button[id="onetrust-accept-btn-handler"], button[id="truste-consent-button"]' + const buttonHandlersName = [ + 'onetrust-accept-btn-handler', + 'truste-consent-button', + ]; + + let bannerAccepted = false; + + await Promise.all( + buttonHandlersName.map(async (handler) => { + const acceptAllCookiesBanner = await sitePage.$( + `button[id="${handler}"]` + ); + + if (acceptAllCookiesBanner && !bannerAccepted) { + await acceptAllCookiesBanner.evaluate((button) => { + button?.click(); + }); + bannerAccepted = true; + } + + return handler; + }) ); - if (acceptAllCookiesBanner) { - await acceptAllCookiesBanner.evaluate((button) => { - button.click(); - }); + if (bannerAccepted) { this.debugLog('Found and accepted all cookies in GDPR banner'); } else { this.debugLog('Couldnt find accept GDPR banner'); From 78396543135bd95ce3afb439062a24de1e37bbd7 Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Mon, 1 Jan 2024 14:22:49 +0530 Subject: [PATCH 4/9] Make a generic function to accept GDPR banners. --- .../cli/src/utils/browserManagement/index.ts | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index c049ee423..e5247e229 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -95,35 +95,37 @@ export class BrowserManagement { } async clickOnAcceptBanner(sitePage: Page) { - const buttonHandlersName = [ - 'onetrust-accept-btn-handler', - 'truste-consent-button', - ]; - - let bannerAccepted = false; - - await Promise.all( - buttonHandlersName.map(async (handler) => { - const acceptAllCookiesBanner = await sitePage.$( - `button[id="${handler}"]` - ); + await sitePage.evaluate(() => { + const bannerNodes: Element[] = Array.from( + (document.querySelector('body')?.childNodes || []) as Element[] + ) + .filter((node: Element) => node && node?.tagName === 'DIV') + .filter((node) => { + if (!node || !node?.textContent) { + return false; + } + const regex = /\b(consent|policy|cookie policy|privacy policy)\b/; + return regex.test(node?.textContent.toLowerCase()); + }); - if (acceptAllCookiesBanner && !bannerAccepted) { - await acceptAllCookiesBanner.evaluate((button) => { - button?.click(); - }); - bannerAccepted = true; + const buttonToClick: HTMLButtonElement[] = bannerNodes.map( + (node: Element) => { + const buttonNodes = Array.from( + node.getElementsByTagName('button') || + node.getElementsByTagName('a') + ); + const isButtonForAccept = buttonNodes.filter( + (cnode) => + cnode.textContent && + (cnode.textContent.toLowerCase().includes('accept') || + cnode.textContent.toLowerCase().includes('allow')) + ); + + return isButtonForAccept[0]; } - - return handler; - }) - ); - - if (bannerAccepted) { - this.debugLog('Found and accepted all cookies in GDPR banner'); - } else { - this.debugLog('Couldnt find accept GDPR banner'); - } + ); + buttonToClick[0]?.click(); + }); } async openPage(): Promise { From 1697c1b0f0fb06b4f7485aca10f129c1abf49abf Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Mon, 1 Jan 2024 14:27:33 +0530 Subject: [PATCH 5/9] Revert edge case for anchor button. --- packages/cli/src/utils/browserManagement/index.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index e5247e229..07ab2b547 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -110,10 +110,7 @@ export class BrowserManagement { const buttonToClick: HTMLButtonElement[] = bannerNodes.map( (node: Element) => { - const buttonNodes = Array.from( - node.getElementsByTagName('button') || - node.getElementsByTagName('a') - ); + const buttonNodes = Array.from(node.getElementsByTagName('button')); const isButtonForAccept = buttonNodes.filter( (cnode) => cnode.textContent && From a503f63face4f085203e0522e63216e55161993a Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Mon, 1 Jan 2024 15:08:22 +0530 Subject: [PATCH 6/9] Chore: Remove undefined button from the array. Add preferences and personalize in the regex --- packages/cli/src/utils/browserManagement/index.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index 07ab2b547..4177a58e0 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -104,12 +104,13 @@ export class BrowserManagement { if (!node || !node?.textContent) { return false; } - const regex = /\b(consent|policy|cookie policy|privacy policy)\b/; + const regex = + /\b(consent|policy|cookie policy|privacy policy|personalize|preferences)\b/; return regex.test(node?.textContent.toLowerCase()); }); - const buttonToClick: HTMLButtonElement[] = bannerNodes.map( - (node: Element) => { + const buttonToClick: HTMLButtonElement[] = bannerNodes + .map((node: Element) => { const buttonNodes = Array.from(node.getElementsByTagName('button')); const isButtonForAccept = buttonNodes.filter( (cnode) => @@ -119,8 +120,8 @@ export class BrowserManagement { ); return isButtonForAccept[0]; - } - ); + }) + .filter((button) => button); buttonToClick[0]?.click(); }); } From 6dd3fcf693c676bd8a68e591baae134ceb9e6624 Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Mon, 11 Mar 2024 13:56:07 +0530 Subject: [PATCH 7/9] Fix logic for euro news group. --- .../cli/src/utils/browserManagement/index.ts | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index d17e33850..6a16cc654 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -75,8 +75,14 @@ export class BrowserManagement { this.debugLog('browser intialized'); } - async clickOnAcceptBanner(sitePage: Page) { - await sitePage.evaluate(() => { + async clickOnAcceptBanner(url: string) { + const page = this.pageMap.get(url); + + if (!page) { + throw new Error('no page with the provided id was found'); + } + + await page.evaluate(() => { const bannerNodes: Element[] = Array.from( (document.querySelector('body')?.childNodes || []) as Element[] ) @@ -87,7 +93,8 @@ export class BrowserManagement { } const regex = /\b(consent|policy|cookie policy|privacy policy|personalize|preferences)\b/; - return regex.test(node?.textContent.toLowerCase()); + + return regex.test(node.textContent.toLowerCase()); }); const buttonToClick: HTMLButtonElement[] = bannerNodes @@ -97,7 +104,8 @@ export class BrowserManagement { (cnode) => cnode.textContent && (cnode.textContent.toLowerCase().includes('accept') || - cnode.textContent.toLowerCase().includes('allow')) + cnode.textContent.toLowerCase().includes('allow') || + cnode.textContent.toLowerCase().includes('agree')) ); return isButtonForAccept[0]; @@ -141,7 +149,6 @@ export class BrowserManagement { ); //ignore } - await this.clickOnAcceptBanner(page); await delay(this.pageWaitTime / 2); await page.evaluate(() => { @@ -298,6 +305,7 @@ export class BrowserManagement { await Promise.all( urls.map(async (url) => { await this.navigateAndScroll(url); + await this.clickOnAcceptBanner(url); }) ); From 6ecc78f3862d09b60d91d42326aced588e750dbc Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Fri, 22 Mar 2024 14:23:47 +0530 Subject: [PATCH 8/9] Refactor routine for cli. --- .../cli/src/utils/browserManagement/index.ts | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index 6a16cc654..0036f0d78 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -97,6 +97,10 @@ export class BrowserManagement { return regex.test(node.textContent.toLowerCase()); }); + if (bannerNodes.length > 0) { + this.debugLog(`found GDPR banner in the page.`); + } + const buttonToClick: HTMLButtonElement[] = bannerNodes .map((node: Element) => { const buttonNodes = Array.from(node.getElementsByTagName('button')); @@ -113,6 +117,8 @@ export class BrowserManagement { .filter((button) => button); buttonToClick[0]?.click(); }); + + await delay(this.pageWaitTime / 2); } async openPage(): Promise { @@ -130,33 +136,51 @@ export class BrowserManagement { height: 790, deviceScaleFactor: 1, }); + this.debugLog('Page opened'); return sitePage; } - async navigateAndScroll(url: string) { + async navigateToPage(url: string) { const page = this.pageMap.get(url); + if (!page) { throw new Error('no page with the provided id was found'); } + this.debugLog(`starting navigation to url ${url}`); + try { await page.goto(url, { timeout: 10000 }); + this.debugLog(`done with navigation to url:${url}`); } catch (error) { this.debugLog( `navigation did not finish in 10 seconds moving on to scrolling` ); //ignore } - await delay(this.pageWaitTime / 2); + } - await page.evaluate(() => { - window.scrollBy(0, 10000); - }); + async pageScroll(url: string) { + const page = this.pageMap.get(url); + + if (!page) { + throw new Error('no page with the provided id was found'); + } + + try { + await page.evaluate(() => { + window.scrollBy(0, 10000); + }); + } catch (error) { + this.debugLog(`scrolling the page to the end.`); + //ignore + } await delay(this.pageWaitTime / 2); - this.debugLog(`done navigating and scrolling to url:${url}`); + + this.debugLog(`scrolling on url:${url}`); } async attachNetworkListenersToPage(pageId: string) { @@ -304,8 +328,9 @@ export class BrowserManagement { // start navigation in parallel await Promise.all( urls.map(async (url) => { - await this.navigateAndScroll(url); + await this.navigateToPage(url); await this.clickOnAcceptBanner(url); + await this.pageScroll(url); }) ); From 90f4e4ba55d85deab55e3cdeac2fe615837d4703 Mon Sep 17 00:00:00 2001 From: Amoghavarsha Kudaligi Date: Fri, 22 Mar 2024 14:55:53 +0530 Subject: [PATCH 9/9] Should accept banner if option has been passed. --- packages/cli/src/index.ts | 8 +++++++- packages/cli/src/procedures/analyzeCookieUrls.ts | 8 ++++++-- packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts | 6 ++++-- packages/cli/src/utils/browserManagement/index.ts | 6 ++++-- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index e45f06ece..1ea8bb008 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -68,6 +68,10 @@ program .option( '-d, --out-dir ', 'Directory path where the analysis data will be stored' + ) + .option( + '-ab, --accept-banner', + 'This will accept the GDPR banner if present.' ); program.parse(); @@ -116,6 +120,7 @@ const startDashboardServer = async (dir: string) => { const shouldSkipPrompts = !program.opts().prompts; const shouldSkipTechnologyAnalysis = !program.opts().technology; const outDir = program.opts().outDir; + const shouldSkipAcceptBanner = program.opts().acceptBanner; validateArgs( url, @@ -187,7 +192,8 @@ const startDashboardServer = async (dir: string) => { DELAY_TIME, cookieDictionary, 3, - urlsToProcess.length !== 1 ? spinnies : undefined + urlsToProcess.length !== 1 ? spinnies : undefined, + shouldSkipAcceptBanner ); spinnies.succeed('cookie-spinner', { diff --git a/packages/cli/src/procedures/analyzeCookieUrls.ts b/packages/cli/src/procedures/analyzeCookieUrls.ts index 17a951309..93a8241f4 100644 --- a/packages/cli/src/procedures/analyzeCookieUrls.ts +++ b/packages/cli/src/procedures/analyzeCookieUrls.ts @@ -29,7 +29,8 @@ export const analyzeCookiesUrls = async ( urls: string[], isHeadless: boolean, delayTime: number, - cookieDictionary: CookieDatabase + cookieDictionary: CookieDatabase, + shouldSkipAcceptBanner: boolean ) => { const browser = new BrowserManagement( { @@ -43,7 +44,10 @@ export const analyzeCookiesUrls = async ( ); await browser.initializeBrowser(true); - const analysisCookieData = await browser.analyzeCookieUrls(urls); + const analysisCookieData = await browser.analyzeCookieUrls( + urls, + shouldSkipAcceptBanner + ); const res = analysisCookieData.map(({ pageUrl, cookieData }) => { Object.entries(cookieData).forEach(([, frameData]) => { diff --git a/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts b/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts index db27f6f2c..93df0200c 100644 --- a/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts +++ b/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts @@ -36,7 +36,8 @@ export const analyzeCookiesUrlsInBatches = async ( id: string, { text, indent }: { text: string; indent: number } ) => void; - } + }, + shouldSkipAcceptBanner = false ) => { let report: { pageUrl: string; @@ -66,7 +67,8 @@ export const analyzeCookiesUrlsInBatches = async ( urlsWindow, isHeadless, delayTime, - cookieDictionary + cookieDictionary, + shouldSkipAcceptBanner ); report = [...report, ...cookieAnalysis]; diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index 0036f0d78..43da2ec88 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -318,7 +318,7 @@ export class BrowserManagement { return frameIdMapFromTree; } - async analyzeCookieUrls(urls: string[]) { + async analyzeCookieUrls(urls: string[], shouldSkipAcceptBanner: boolean) { for (const url of urls) { const sitePage = await this.openPage(); this.pageMap.set(url, sitePage); @@ -329,7 +329,9 @@ export class BrowserManagement { await Promise.all( urls.map(async (url) => { await this.navigateToPage(url); - await this.clickOnAcceptBanner(url); + if (shouldSkipAcceptBanner) { + await this.clickOnAcceptBanner(url); + } await this.pageScroll(url); }) );