diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index e45f06ece..1ea8bb008 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -68,6 +68,10 @@ program .option( '-d, --out-dir ', 'Directory path where the analysis data will be stored' + ) + .option( + '-ab, --accept-banner', + 'This will accept the GDPR banner if present.' ); program.parse(); @@ -116,6 +120,7 @@ const startDashboardServer = async (dir: string) => { const shouldSkipPrompts = !program.opts().prompts; const shouldSkipTechnologyAnalysis = !program.opts().technology; const outDir = program.opts().outDir; + const shouldSkipAcceptBanner = program.opts().acceptBanner; validateArgs( url, @@ -187,7 +192,8 @@ const startDashboardServer = async (dir: string) => { DELAY_TIME, cookieDictionary, 3, - urlsToProcess.length !== 1 ? spinnies : undefined + urlsToProcess.length !== 1 ? spinnies : undefined, + shouldSkipAcceptBanner ); spinnies.succeed('cookie-spinner', { diff --git a/packages/cli/src/procedures/analyzeCookieUrls.ts b/packages/cli/src/procedures/analyzeCookieUrls.ts index 17a951309..93a8241f4 100644 --- a/packages/cli/src/procedures/analyzeCookieUrls.ts +++ b/packages/cli/src/procedures/analyzeCookieUrls.ts @@ -29,7 +29,8 @@ export const analyzeCookiesUrls = async ( urls: string[], isHeadless: boolean, delayTime: number, - cookieDictionary: CookieDatabase + cookieDictionary: CookieDatabase, + shouldSkipAcceptBanner: boolean ) => { const browser = new BrowserManagement( { @@ -43,7 +44,10 @@ export const analyzeCookiesUrls = async ( ); await browser.initializeBrowser(true); - const analysisCookieData = await browser.analyzeCookieUrls(urls); + const analysisCookieData = await browser.analyzeCookieUrls( + urls, + shouldSkipAcceptBanner + ); const res = analysisCookieData.map(({ pageUrl, cookieData }) => { Object.entries(cookieData).forEach(([, frameData]) => { diff --git a/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts b/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts index db27f6f2c..93df0200c 100644 --- a/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts +++ b/packages/cli/src/procedures/analyzeCookieUrlsInBatches.ts @@ -36,7 +36,8 @@ export const analyzeCookiesUrlsInBatches = async ( id: string, { text, indent }: { text: string; indent: number } ) => void; - } + }, + shouldSkipAcceptBanner = false ) => { let report: { pageUrl: string; @@ -66,7 +67,8 @@ export const analyzeCookiesUrlsInBatches = async ( urlsWindow, isHeadless, delayTime, - cookieDictionary + cookieDictionary, + shouldSkipAcceptBanner ); report = [...report, ...cookieAnalysis]; diff --git a/packages/cli/src/utils/browserManagement/index.ts b/packages/cli/src/utils/browserManagement/index.ts index 134b2a44f..43da2ec88 100644 --- a/packages/cli/src/utils/browserManagement/index.ts +++ b/packages/cli/src/utils/browserManagement/index.ts @@ -75,6 +75,52 @@ export class BrowserManagement { this.debugLog('browser intialized'); } + async clickOnAcceptBanner(url: string) { + const page = this.pageMap.get(url); + + if (!page) { + throw new Error('no page with the provided id was found'); + } + + await page.evaluate(() => { + const bannerNodes: Element[] = Array.from( + (document.querySelector('body')?.childNodes || []) as Element[] + ) + .filter((node: Element) => node && node?.tagName === 'DIV') + .filter((node) => { + if (!node || !node?.textContent) { + return false; + } + const regex = + /\b(consent|policy|cookie policy|privacy policy|personalize|preferences)\b/; + + return regex.test(node.textContent.toLowerCase()); + }); + + if (bannerNodes.length > 0) { + this.debugLog(`found GDPR banner in the page.`); + } + + const buttonToClick: HTMLButtonElement[] = bannerNodes + .map((node: Element) => { + const buttonNodes = Array.from(node.getElementsByTagName('button')); + const isButtonForAccept = buttonNodes.filter( + (cnode) => + cnode.textContent && + (cnode.textContent.toLowerCase().includes('accept') || + cnode.textContent.toLowerCase().includes('allow') || + cnode.textContent.toLowerCase().includes('agree')) + ); + + return isButtonForAccept[0]; + }) + .filter((button) => button); + buttonToClick[0]?.click(); + }); + + await delay(this.pageWaitTime / 2); + } + async openPage(): Promise { if (!this.browser) { throw new Error('Browser not intialized'); @@ -90,33 +136,51 @@ export class BrowserManagement { height: 790, deviceScaleFactor: 1, }); + this.debugLog('Page opened'); + return sitePage; } - async navigateAndScroll(url: string) { + async navigateToPage(url: string) { const page = this.pageMap.get(url); + if (!page) { throw new Error('no page with the provided id was found'); } + this.debugLog(`starting navigation to url ${url}`); + try { await page.goto(url, { timeout: 10000 }); + this.debugLog(`done with navigation to url:${url}`); } catch (error) { this.debugLog( `navigation did not finish in 10 seconds moving on to scrolling` ); //ignore } + } - await delay(this.pageWaitTime / 2); + async pageScroll(url: string) { + const page = this.pageMap.get(url); - await page.evaluate(() => { - window.scrollBy(0, 10000); - }); + if (!page) { + throw new Error('no page with the provided id was found'); + } + + try { + await page.evaluate(() => { + window.scrollBy(0, 10000); + }); + } catch (error) { + this.debugLog(`scrolling the page to the end.`); + //ignore + } await delay(this.pageWaitTime / 2); - this.debugLog(`done navigating and scrolling to url:${url}`); + + this.debugLog(`scrolling on url:${url}`); } async attachNetworkListenersToPage(pageId: string) { @@ -254,7 +318,7 @@ export class BrowserManagement { return frameIdMapFromTree; } - async analyzeCookieUrls(urls: string[]) { + async analyzeCookieUrls(urls: string[], shouldSkipAcceptBanner: boolean) { for (const url of urls) { const sitePage = await this.openPage(); this.pageMap.set(url, sitePage); @@ -264,7 +328,11 @@ export class BrowserManagement { // start navigation in parallel await Promise.all( urls.map(async (url) => { - await this.navigateAndScroll(url); + await this.navigateToPage(url); + if (shouldSkipAcceptBanner) { + await this.clickOnAcceptBanner(url); + } + await this.pageScroll(url); }) );