diff --git a/packages/analysis-utils/src/browserManagement/index.ts b/packages/analysis-utils/src/browserManagement/index.ts index a0adeaa33..08ae4dab7 100644 --- a/packages/analysis-utils/src/browserManagement/index.ts +++ b/packages/analysis-utils/src/browserManagement/index.ts @@ -24,6 +24,7 @@ import { type ScriptTagUnderCheck, type LibraryData, type LibraryMatchers, + type SingleURLError, resolveWithTimeout, delay, RESPONSE_EVENT, @@ -49,12 +50,14 @@ export class BrowserManagement { isHeadless: boolean; pageWaitTime: number; pages: Record; + erroredOutUrls: Record; pageFrames: Record>; pageResponses: Record>; pageRequests: Record>; pageResourcesMaps: Record>; shouldLogDebug: boolean; spinnies: Spinnies | undefined; + isSiteMap: boolean; indent = 0; constructor( viewportConfig: ViewportConfig, @@ -62,12 +65,14 @@ export class BrowserManagement { pageWaitTime: number, shouldLogDebug: boolean, indent: number, + isSiteMap: boolean, spinnies?: Spinnies ) { this.viewportConfig = viewportConfig; this.browser = null; this.isHeadless = isHeadless; this.pageWaitTime = pageWaitTime; + this.isSiteMap = isSiteMap; this.pages = {}; this.pageFrames = {}; this.pageResponses = {}; @@ -76,14 +81,15 @@ export class BrowserManagement { this.pageResourcesMaps = {}; this.spinnies = spinnies; this.indent = indent; + this.erroredOutUrls = {}; } - debugLog(msg: any) { + debugLog(msg: string, shouldShowWarning?: boolean) { if (this.shouldLogDebug && this.spinnies) { this.spinnies.add(msg, { text: msg, - //@ts-ignore - succeedColor: 'white', + succeedColor: shouldShowWarning ? 'yellowBright' : 'white', + spinnerColor: shouldShowWarning ? 'yellowBright' : 'white', status: 'non-spinnable', indent: this.indent, }); @@ -109,46 +115,58 @@ export class BrowserManagement { } async clickOnAcceptBanner(url: string) { - const page = this.pages[url]; + try { + const page = this.pages[url]; - if (!page) { - throw new Error('No page with the provided id was found'); - } + if (!page) { + throw new Error('No page with the provided id was found'); + } - await page.evaluate(() => { - const bannerNodes: Element[] = Array.from( - (document.querySelector('body')?.childNodes || []) as Element[] - ) - .filter((node: Element) => node && node?.tagName === 'DIV') - .filter((node) => { - if (!node || !node?.textContent) { - return false; - } - const regex = - /\b(consent|policy|cookie policy|privacy policy|personalize|preferences)\b/; + await page.evaluate(() => { + const bannerNodes: Element[] = Array.from( + (document.querySelector('body')?.childNodes || []) as Element[] + ) + .filter((node: Element) => node && node?.tagName === 'DIV') + .filter((node) => { + if (!node || !node?.textContent) { + return false; + } + const regex = + /\b(consent|policy|cookie policy|privacy policy|personalize|preferences)\b/; - return regex.test(node.textContent.toLowerCase()); - }); + return regex.test(node.textContent.toLowerCase()); + }); - const buttonToClick: HTMLButtonElement[] = bannerNodes - .map((node: Element) => { - const buttonNodes = Array.from(node.getElementsByTagName('button')); - const isButtonForAccept = buttonNodes.filter( - (cnode) => - cnode.textContent && - (cnode.textContent.toLowerCase().includes('accept') || - cnode.textContent.toLowerCase().includes('allow') || - cnode.textContent.toLowerCase().includes('ok') || - cnode.textContent.toLowerCase().includes('agree')) - ); + const buttonToClick: HTMLButtonElement[] = bannerNodes + .map((node: Element) => { + const buttonNodes = Array.from(node.getElementsByTagName('button')); + const isButtonForAccept = buttonNodes.filter( + (cnode) => + cnode.textContent && + (cnode.textContent.toLowerCase().includes('accept') || + cnode.textContent.toLowerCase().includes('allow') || + cnode.textContent.toLowerCase().includes('ok') || + cnode.textContent.toLowerCase().includes('agree')) + ); - return isButtonForAccept[0]; - }) - .filter((button) => button); - buttonToClick[0]?.click(); - }); + return isButtonForAccept[0]; + }) + .filter((button) => button); + buttonToClick[0]?.click(); + }); - await delay(this.pageWaitTime / 2); + await delay(this.pageWaitTime / 2); + } catch (error) { + if (error instanceof Error) { + this.pushErrors(url, { + errorMessage: error.message, + stackTrace: error?.stack ?? '', + errorName: error?.name, + }); + + throw error; + } + } } async openPage(): Promise { @@ -172,6 +190,14 @@ export class BrowserManagement { return sitePage; } + pushErrors(url: string, objectToPushed: SingleURLError) { + if (!this.erroredOutUrls[url]) { + this.erroredOutUrls[url] = []; + } + + this.erroredOutUrls[url].push(objectToPushed); + } + async navigateToPage(url: string) { const page = this.pages[url]; @@ -182,13 +208,43 @@ export class BrowserManagement { this.debugLog(`Starting navigation to URL: ${url}`); try { - await page.goto(url, { timeout: 10000 }); + const response = await page.goto(url, { + timeout: 10000, + }); + + const SUCCESS_RESPONSE = 200; + + if (response && response.status() !== SUCCESS_RESPONSE) { + this.pushErrors(url, { + errorMessage: `Invalid server response: ${response.status()}`, + errorCode: `${response.status()}`, + errorName: `INVALID_SERVER_RESPONSE`, + }); + + this.debugLog(`Warning: Server error found in URL: ${url}`, true); + + if (!this.isSiteMap) { + throw new Error(`Invalid server response: ${response.status()}`); + } + } + this.debugLog(`Navigation completed to URL: ${url}`); } catch (error) { - this.debugLog( - `Navigation did not finish in 10 seconds moving on to scrolling` - ); - //ignore + if (error instanceof Error) { + this.pushErrors(url, { + errorMessage: error.message, + stackTrace: error?.stack ?? '', + errorName: error?.name, + }); + + if (error?.name === 'TimeoutError') { + this.debugLog( + `Navigation did not finish on URL ${url} in 10 seconds moving on to scrolling` + ); + } + + throw error; + } } } @@ -609,40 +665,58 @@ export class BrowserManagement { url: string, Libraries: LibraryMatchers[] ) { - const page = this.pages[url]; - - if (!page) { - throw new Error('No page with the provided ID was found'); - } + try { + const page = this.pages[url]; - const domQueryMatches: LibraryData = {}; + if (!page) { + throw new Error('No page with the provided ID was found'); + } - await Promise.all( - Libraries.map(async ({ domQueryFunction, name }) => { - if (domQueryFunction && name) { - await page.addScriptTag({ - content: `window.${name.replaceAll('-', '')} = ${domQueryFunction}`, - }); + const domQueryMatches: LibraryData = {}; - const queryResult = await page.evaluate((library: string) => { - //@ts-ignore - const functionDOMQuery = window[`${library}`]; + await Promise.all( + Libraries.map(async ({ domQueryFunction, name }) => { + if (domQueryFunction && name) { + await page.addScriptTag({ + content: `window.${name.replaceAll( + '-', + '' + )} = ${domQueryFunction}`, + }); + + const queryResult = await page.evaluate((library: string) => { + //@ts-ignore + const functionDOMQuery = window[`${library}`]; + + if (!functionDOMQuery) { + return []; + } + + return functionDOMQuery(); + }, name.replaceAll('-', '')); + + domQueryMatches[name] = { + domQuerymatches: queryResult as [string], + }; + } + }) + ); - if (!functionDOMQuery) { - return []; - } + const mainFrameUrl = new URL(page.url()).origin; - return functionDOMQuery(); - }, name.replaceAll('-', '')); + return { [mainFrameUrl]: domQueryMatches }; + } catch (error) { + if (error instanceof Error) { + this.pushErrors(url, { + errorMessage: error.message, + stackTrace: error?.stack ?? '', + errorName: error?.name, + }); - domQueryMatches[name] = { - domQuerymatches: queryResult as [string], - }; - } - }) - ); - const mainFrameUrl = new URL(page.url()).origin; - return { [mainFrameUrl]: domQueryMatches }; + throw error; + } + return {}; + } } async analyzeCookies( @@ -661,11 +735,18 @@ export class BrowserManagement { ); // Navigate to URLs - await Promise.all( - userProvidedUrls.map(async (url) => { - await this.navigateToPage(url); - }) - ); + // eslint-disable-next-line no-useless-catch -- Because we are rethrowing the same error no need to create a new Error instance + try { + await Promise.all( + userProvidedUrls.map(async (url) => { + await this.navigateToPage(url); + }) + ); + } catch (error) { + if (!this.isSiteMap) { + throw error; + } + } // Delay for page to load resources await delay(this.pageWaitTime / 2); @@ -673,12 +754,17 @@ export class BrowserManagement { // Accept Banners if (!shouldSkipAcceptBanner) { // delay - - await Promise.all( - userProvidedUrls.map(async (url) => { - await this.clickOnAcceptBanner(url); - }) - ); + try { + await Promise.all( + userProvidedUrls.map(async (url) => { + await this.clickOnAcceptBanner(url); + }) + ); + } catch (error) { + if (!this.isSiteMap) { + throw error; + } + } } // Scroll to bottom of the page @@ -688,19 +774,25 @@ export class BrowserManagement { }) ); - await Promise.all( - userProvidedUrls.map(async (url) => { - const newMatches = await this.insertAndRunDOMQueryFunctions( - url, - Libraries - ); + try { + await Promise.all( + userProvidedUrls.map(async (url) => { + const newMatches = await this.insertAndRunDOMQueryFunctions( + url, + Libraries + ); - consolidatedDOMQueryMatches = { - ...consolidatedDOMQueryMatches, - ...newMatches, - }; - }) - ); + consolidatedDOMQueryMatches = { + ...consolidatedDOMQueryMatches, + ...newMatches, + }; + }) + ); + } catch (error) { + if (!this.isSiteMap) { + throw error; + } + } // Delay for page to load more resources await delay(this.pageWaitTime / 2); @@ -755,7 +847,11 @@ export class BrowserManagement { }) ); - return { result, consolidatedDOMQueryMatches }; + return { + result, + consolidatedDOMQueryMatches, + erroredOutUrls: this.erroredOutUrls, + }; } async deinitialize() { diff --git a/packages/analysis-utils/src/procedures/analyzeCookiesUrlsAndFetchResources.ts b/packages/analysis-utils/src/procedures/analyzeCookiesUrlsAndFetchResources.ts index 74dc8d3c2..1878499e7 100644 --- a/packages/analysis-utils/src/procedures/analyzeCookiesUrlsAndFetchResources.ts +++ b/packages/analysis-utils/src/procedures/analyzeCookiesUrlsAndFetchResources.ts @@ -38,63 +38,75 @@ export const analyzeCookiesUrlsAndFetchResources = async ( cookieDictionary: CookieDatabase, shouldSkipAcceptBanner: boolean, verbose: boolean, + isSitemap: boolean, spinnies?: Spinnies, indent = 4 ) => { - const browser = new BrowserManagement( - { - width: 1440, - height: 790, - deviceScaleFactor: 1, - }, - isHeadless, - delayTime, - verbose, - indent, - spinnies - ); + // eslint-disable-next-line no-useless-catch -- Because we are rethrowing the same error no need to create a new Error instance + try { + const browser = new BrowserManagement( + { + width: 1440, + height: 790, + deviceScaleFactor: 1, + }, + isHeadless, + delayTime, + verbose, + indent, + isSitemap, + spinnies + ); - await browser.initializeBrowser(true); - const { result: analysisCookieData, consolidatedDOMQueryMatches } = - await browser.analyzeCookies(urls, shouldSkipAcceptBanner, Libraries); + await browser.initializeBrowser(true); - const resources = browser.getResources(urls); + const { + result: analysisCookieData, + consolidatedDOMQueryMatches, + erroredOutUrls, + } = await browser.analyzeCookies(urls, shouldSkipAcceptBanner, Libraries); - const res = analysisCookieData.map(({ url: pageUrl, cookieData }) => { - Object.entries(cookieData).forEach(([, frameData]) => { - const frameCookies = frameData.frameCookies; - Object.entries(frameCookies).forEach(([key, cookie]) => { - const analytics = findAnalyticsMatch( - cookie.parsedCookie.name, - cookieDictionary - ); + const resources = browser.getResources(urls); - frameCookies[key.trim()].analytics = { - platform: analytics?.platform || 'Unknown', - category: analytics?.category || 'Uncategorized', - gdprUrl: analytics?.gdprUrl || '', - description: analytics?.description, - }; + const res = analysisCookieData.map(({ url: pageUrl, cookieData }) => { + Object.entries(cookieData).forEach(([, frameData]) => { + const frameCookies = frameData.frameCookies; + Object.entries(frameCookies).forEach(([key, cookie]) => { + const analytics = findAnalyticsMatch( + cookie.parsedCookie.name, + cookieDictionary + ); - frameCookies[key.trim()].isFirstParty = isFirstParty( - cookie.parsedCookie.domain, - pageUrl - ); + frameCookies[key.trim()].analytics = { + platform: analytics?.platform || 'Unknown', + category: analytics?.category || 'Uncategorized', + gdprUrl: analytics?.gdprUrl || '', + description: analytics?.description, + }; - frameCookies[key.trim()].blockingStatus = deriveBlockingStatus( - cookie.networkEvents - ); + frameCookies[key.trim()].isFirstParty = isFirstParty( + cookie.parsedCookie.domain, + pageUrl + ); + + frameCookies[key.trim()].blockingStatus = deriveBlockingStatus( + cookie.networkEvents + ); + }); }); - }); - return { - url: pageUrl, - cookieData, - resources: resources[pageUrl], - domQueryMatches: consolidatedDOMQueryMatches[pageUrl], - }; - }); + return { + url: pageUrl, + cookieData, + resources: resources[pageUrl], + erroredOutUrls, + domQueryMatches: consolidatedDOMQueryMatches[pageUrl], + }; + }); - await browser.deinitialize(); - return res; + await browser.deinitialize(); + return res; + } catch (error) { + throw error; + } }; diff --git a/packages/analysis-utils/src/procedures/analyzeCookiesUrlsInBatchesAndFetchResources.ts b/packages/analysis-utils/src/procedures/analyzeCookiesUrlsInBatchesAndFetchResources.ts index 8d4d0f034..7dbcd2fba 100644 --- a/packages/analysis-utils/src/procedures/analyzeCookiesUrlsInBatchesAndFetchResources.ts +++ b/packages/analysis-utils/src/procedures/analyzeCookiesUrlsInBatchesAndFetchResources.ts @@ -23,6 +23,7 @@ import { LibraryData, type LibraryMatchers, removeAndAddNewSpinnerText, + type SingleURLError, } from '@google-psat/common'; /** @@ -42,59 +43,66 @@ export const analyzeCookiesUrlsInBatchesAndFetchResources = async ( verbose = false, indent = 4 ) => { - let report: { - url: string; - cookieData: { - [frameUrl: string]: { - frameCookies: { - [key: string]: CookieData; + // eslint-disable-next-line no-useless-catch -- Because we are rethrowing the same error no need to create a new Error instance + try { + let report: { + url: string; + cookieData: { + [frameUrl: string]: { + frameCookies: { + [key: string]: CookieData; + }; }; }; - }; - resources: { - origin: string | null; - content: string; - type?: string; - }[]; - domQueryMatches: LibraryData; - }[] = []; + resources: { + origin: string | null; + content: string; + type?: string; + }[]; + domQueryMatches: LibraryData; + erroredOutUrls: Record; + }[] = []; - for (let i = 0; i < urls.length; i += batchSize) { - const start = i; - const end = Math.min(urls.length - 1, i + batchSize - 1); + for (let i = 0; i < urls.length; i += batchSize) { + const start = i; + const end = Math.min(urls.length - 1, i + batchSize - 1); - spinnies && - indent === 4 && - spinnies.add(`cookie-batch-spinner${start + 1}-${end + 1}`, { - text: `Analyzing cookies in URLs ${start + 1} - ${end + 1}`, - indent, - }); + spinnies && + indent === 4 && + spinnies.add(`cookie-batch-spinner${start + 1}-${end + 1}`, { + text: `Analyzing cookies in URLs ${start + 1} - ${end + 1}`, + indent, + }); - const urlsWindow = urls.slice(start, end + 1); + const urlsWindow = urls.slice(start, end + 1); - const cookieAnalysisAndFetchedResources = - await analyzeCookiesUrlsAndFetchResources( - urlsWindow, - Libraries, - isHeadless, - delayTime, - cookieDictionary, - shouldSkipAcceptBanner, - verbose, - spinnies - ); + const cookieAnalysisAndFetchedResources = + await analyzeCookiesUrlsAndFetchResources( + urlsWindow, + Libraries, + isHeadless, + delayTime, + cookieDictionary, + shouldSkipAcceptBanner, + verbose, + urls.length > 1, + spinnies + ); - report = [...report, ...cookieAnalysisAndFetchedResources]; + report = [...report, ...cookieAnalysisAndFetchedResources]; - spinnies && - indent === 4 && - removeAndAddNewSpinnerText( - spinnies, - `cookie-batch-spinner${start + 1}-${end + 1}`, - `Done analyzing cookies in URLs ${start + 1} - ${end + 1}`, - indent - ); - } + spinnies && + indent === 4 && + removeAndAddNewSpinnerText( + spinnies, + `cookie-batch-spinner${start + 1}-${end + 1}`, + `Done analyzing cookies in URLs ${start + 1} - ${end + 1}`, + indent + ); + } - return report; + return report; + } catch (error) { + throw error; + } }; diff --git a/packages/cli-dashboard/src/app.tsx b/packages/cli-dashboard/src/app.tsx index c0cb12fd5..e1f4de1ef 100644 --- a/packages/cli-dashboard/src/app.tsx +++ b/packages/cli-dashboard/src/app.tsx @@ -20,6 +20,7 @@ import React, { useEffect, useState } from 'react'; import type { CompleteJson, CookieFrameStorageType, + ErroredOutUrlsData, LibraryData, TechnologyData, } from '@google-psat/common'; @@ -43,12 +44,20 @@ enum DisplayType { const App = () => { const [cookies, setCookies] = useState({}); + const [landingPageCookies, setLandingPageCookies] = useState({}); + const [technologies, setTechnologies] = useState([]); + const [completeJsonReport, setCompleteJsonReport] = useState< CompleteJson[] | null >(null); + + const [erroredOutUrls, setErroredOutUrls] = useState( + [] + ); + const [libraryMatches, setLibraryMatches] = useState<{ [key: string]: LibraryData; } | null>(null); @@ -110,6 +119,7 @@ const App = () => { _libraryMatches = extractedData.consolidatedLibraryMatches; setLandingPageCookies(extractedData.landingPageCookies); + setErroredOutUrls(extractedData.erroredOutUrlsData); } else { _cookies = extractCookies(data[0].cookieData, '', true); _technologies = data[0].technologyData; @@ -124,6 +134,7 @@ const App = () => { if (type === DisplayType.SITEMAP) { return ( { +xdescribe('CLI E2E Test', () => { const cli = require.resolve('../../dist/main.js'); - afterAll(() => { - fs.rmSync(path.join(process.cwd(), '/out/bbc-com'), { recursive: true }); - }); - it('Should run site analysis', () => { return coffee - .fork(cli, ['-u https://bbc.com', '-w 1']) - .includes('stdout', '/out/bbc-com/report_') + .fork(cli, ['-u https://httpstatus.us/200', '-w 1000']) + .includes('stdout', '/out/httpstatus-us-200/report_') .end(); }, 60000); }); diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 77bfaef6e..0a6108bf8 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -24,20 +24,12 @@ import { existsSync } from 'fs-extra'; import Spinnies from 'spinnies'; import path, { basename } from 'path'; import { I18n } from '@google-psat/i18n'; -import { - type CompleteJson, - type LibraryData, - removeAndAddNewSpinnerText, -} from '@google-psat/common'; +import { removeAndAddNewSpinnerText } from '@google-psat/common'; import { analyzeCookiesUrlsInBatchesAndFetchResources, analyzeTechnologiesUrlsInBatches, } from '@google-psat/analysis-utils'; -import { - DetectionFunctions, - LIBRARIES, - detectMatchingSignatures, -} from '@google-psat/library-detection'; +import { LIBRARIES } from '@google-psat/library-detection'; /** * Internal dependencies. @@ -53,9 +45,10 @@ import { filePathValidator, urlValidator, numericValidator, + redLogger, + getSiteReport, + saveResultsAsHTML, } from './utils'; -import { redLogger } from './utils/coloredLoggers'; -import saveResultsAsHTML from './utils/saveResultAsHTML'; events.EventEmitter.defaultMaxListeners = 15; @@ -230,20 +223,35 @@ program.parse(); text: 'Analyzing cookies on the first site visit', }); - const cookieAnalysisAndFetchedResourceData = - await analyzeCookiesUrlsInBatchesAndFetchResources( - urlsToProcess, - LIBRARIES, - !isHeadful, - waitTime, - cookieDictionary, - concurrency, - spinnies, - shouldSkipAcceptBanner, - verbose, - sitemapUrl || filePath ? 4 : 3 - ); - + let cookieAnalysisAndFetchedResourceData: any; + + // eslint-disable-next-line no-useless-catch -- Because we are rethrowing the same error no need to create a new Error instance + try { + cookieAnalysisAndFetchedResourceData = + await analyzeCookiesUrlsInBatchesAndFetchResources( + urlsToProcess, + LIBRARIES, + !isHeadful, + waitTime, + cookieDictionary, + concurrency, + spinnies, + shouldSkipAcceptBanner, + verbose, + sitemapUrl || filePath ? 4 : 3 + ); + } catch (error) { + if (urlsToProcess.length === 1) { + removeAndAddNewSpinnerText( + spinnies, + 'cookie-spinner', + 'Failure in analyzing cookies!', + 0, + true + ); + throw error; + } + } removeAndAddNewSpinnerText( spinnies, 'cookie-spinner', @@ -271,23 +279,11 @@ program.parse(); ); } - const result = urlsToProcess.map((_url, ind) => { - const detectedMatchingSignatures: LibraryData = { - ...detectMatchingSignatures( - cookieAnalysisAndFetchedResourceData[ind].resources ?? [], - Object.fromEntries( - LIBRARIES.map((library) => [library.name, library.detectionFunction]) - ) as DetectionFunctions - ), - ...(cookieAnalysisAndFetchedResourceData[ind]?.domQueryMatches ?? {}), - }; - return { - pageUrl: _url, - technologyData: technologyAnalysisData ? technologyAnalysisData[ind] : [], - cookieData: cookieAnalysisAndFetchedResourceData[ind].cookieData, - libraryMatches: detectedMatchingSignatures ?? [], - } as unknown as CompleteJson; - }); + const result = getSiteReport( + urlsToProcess, + cookieAnalysisAndFetchedResourceData, + technologyAnalysisData + ); I18n.loadCLIMessagesData(locale); diff --git a/packages/cli/src/utils/getSiteReport.ts b/packages/cli/src/utils/getSiteReport.ts new file mode 100644 index 000000000..23497b471 --- /dev/null +++ b/packages/cli/src/utils/getSiteReport.ts @@ -0,0 +1,114 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * External dependencies + */ +import { + parseUrl, + type CompleteJson, + type ErroredOutUrlsData, + type SingleURLError, +} from '@google-psat/common'; +import { + type DetectionFunctions, + detectMatchingSignatures, + LIBRARIES, + type LibraryData, +} from '@google-psat/library-detection'; + +/** + * This function returns the exact object which will be sent to the cli dashboard when report is generated. + * @param urls The user provided urls to be processed. + * @param processedData The cookie data along with library detection information with erroredOutUrls. + * @param technologyAnalysisData The technology analysis data from CLI. + * @returns {object} The object which will be used to send the report to the cli dashboard. + */ +function getSiteReport( + urls: string[], + processedData: any, + technologyAnalysisData: any +) { + return urls.map((url, index) => { + const { + erroredOutUrls = {}, + cookieData = {}, + domQueryMatches = {}, + resources = [], + } = processedData[index]; + + const hasTimeOutError = erroredOutUrls[url]?.some( + ({ errorName }: SingleURLError) => + errorName === 'TimeoutError' || errorName === 'i' + ); + + const detectedMatchingSignatures: LibraryData = { + ...detectMatchingSignatures( + resources ?? [], + Object.fromEntries( + LIBRARIES.map((library) => [library.name, library.detectionFunction]) + ) as DetectionFunctions + ), + ...(domQueryMatches ?? {}), + }; + + if (erroredOutUrls[url] && erroredOutUrls[url].length > 0) { + if (hasTimeOutError) { + return { + pageUrl: parseUrl(url) ? new URL(url).href : encodeURI(url), + technologyData: technologyAnalysisData + ? technologyAnalysisData[index] + : [], + cookieData: cookieData, + libraryMatches: detectedMatchingSignatures ?? [], + erroredOutUrls: [ + ...erroredOutUrls[url].map((errors: SingleURLError) => { + return { + url: parseUrl(url) ? new URL(url).href : encodeURI(url), + ...errors, + }; + }), + ] as ErroredOutUrlsData[], + } as unknown as CompleteJson; + } + + return { + pageUrl: parseUrl(url) ? new URL(url).href : encodeURI(url), + technologyData: [], + cookieData: {}, + libraryMatches: [], + erroredOutUrls: [ + ...erroredOutUrls[url].map((errors: SingleURLError) => { + return { + url, + ...errors, + }; + }), + ] as ErroredOutUrlsData[], + } as unknown as CompleteJson; + } + + return { + pageUrl: parseUrl(url) ? new URL(url).href : encodeURI(url), + technologyData: technologyAnalysisData + ? technologyAnalysisData[index] + : [], + cookieData, + libraryMatches: detectedMatchingSignatures ?? [], + } as unknown as CompleteJson; + }); +} + +export default getSiteReport; diff --git a/packages/cli/src/utils/index.ts b/packages/cli/src/utils/index.ts index 7502f218b..7278c3a73 100644 --- a/packages/cli/src/utils/index.ts +++ b/packages/cli/src/utils/index.ts @@ -22,4 +22,6 @@ export { default as askUserInput } from './askUserInput'; export { default as generatePrefix } from './generatePrefix'; export { default as getOutputFilePath } from './getOutputFilePath'; export { default as saveResultsAsHTML } from './saveResultAsHTML'; +export { default as getSiteReport } from './getSiteReport'; +export * from './coloredLoggers'; export * from './validators'; diff --git a/packages/cli/src/utils/saveReports.ts b/packages/cli/src/utils/saveReports.ts index 0e13694e7..8e0a861fa 100644 --- a/packages/cli/src/utils/saveReports.ts +++ b/packages/cli/src/utils/saveReports.ts @@ -17,7 +17,7 @@ /** * External dependencies. */ -import { type CompleteJson } from '@google-psat/common'; +import { generateErrorLogFile, type CompleteJson } from '@google-psat/common'; import { ensureFile, writeFile } from 'fs-extra'; /** * Internal dependencies. @@ -54,6 +54,11 @@ const saveReports = async ( 'report.html', sitemapUrl ); + + const errorLogs = generateErrorLogFile(result); + + await ensureFile(path.join(outDir, 'error_logs.txt')); + await writeFile(path.join(outDir, 'error_logs.txt'), errorLogs); // Sitemap report await Promise.all( result.map(async (siteReport) => { diff --git a/packages/cli/src/utils/saveResultAsHTML.ts b/packages/cli/src/utils/saveResultAsHTML.ts index e2b6ab8cb..d948021ee 100644 --- a/packages/cli/src/utils/saveResultAsHTML.ts +++ b/packages/cli/src/utils/saveResultAsHTML.ts @@ -118,6 +118,17 @@ const saveResultsAsHTML = async ( writeFile(outputFilePath, buffer, () => { if (!fileName) { + if ( + result.some( + (singleResult) => + singleResult.erroredOutUrls && + singleResult.erroredOutUrls.length > 0 + ) + ) { + console.log( + `\nWarning: Some URLs encountered issues while analysing cookies. Please check the dashboard for more details.` + ); + } console.log(`\nReport: ${URL.pathToFileURL(outFileFullDir)}`); } }); diff --git a/packages/common/src/cookies.types.ts b/packages/common/src/cookies.types.ts index be414ac03..57138b867 100644 --- a/packages/common/src/cookies.types.ts +++ b/packages/common/src/cookies.types.ts @@ -149,6 +149,14 @@ export type TechnologyData = { pageUrl?: string; }; +export type ErroredOutUrlsData = { + errorCode?: string; + errorMessage: string; + url: string; + stackTrace?: string; + errorName: string; +}; + export interface TabCookies { [key: string]: CookieTableData; } @@ -234,6 +242,13 @@ export type CookieFrameStorageType = { }; }; +export type SingleURLError = { + errorMessage: string; + errorCode?: string; + stackTrace?: string; + errorName: string; +}; + export type CompleteJson = { pageUrl: string; cookieData: { @@ -244,6 +259,7 @@ export type CompleteJson = { frameType?: string | undefined; }; }; + erroredOutUrls: ErroredOutUrlsData[]; libraryMatches: { [key: string]: LibraryData }; technologyData: TechnologyData[]; }; diff --git a/packages/common/src/index.ts b/packages/common/src/index.ts index 9bd59310f..2cfbf0fef 100644 --- a/packages/common/src/index.ts +++ b/packages/common/src/index.ts @@ -24,6 +24,7 @@ export { } from './utils/findAnalyticsMatch'; export { default as removeAndAddNewSpinnerText } from './utils/removeAndAddNewSpinnerText'; export { default as calculateEffectiveExpiryDate } from './utils/calculateEffectiveExpiryDate'; +export { default as generateErrorLogFile } from './utils/generateErrorLogs'; export { default as sanitizeCsvRecord } from './utils/sanitizeCsvRecord'; export { parseUrl } from './utils/parseUrl'; export { default as fetchLocalData } from './utils/fetchLocalData'; diff --git a/packages/common/src/utils/generateErrorLogs.ts b/packages/common/src/utils/generateErrorLogs.ts new file mode 100644 index 000000000..e14611d2a --- /dev/null +++ b/packages/common/src/utils/generateErrorLogs.ts @@ -0,0 +1,50 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Internal dependencies + */ +import { CompleteJson } from '../cookies.types'; + +/** + * This function generates error logs file for a sitemap analysis. + * @param JSONReport The JSON report for which error logs file has to be generated + * @returns string of error logs. + */ +export const generateErrorLogFile = (JSONReport: CompleteJson[]) => { + let erroredOutTextFileData = ''; + + JSONReport.forEach(({ erroredOutUrls }) => { + if (!erroredOutUrls) { + return; + } + + erroredOutUrls.forEach((error) => { + const temporaryFormedData = ` + URL: ${error.url} + Error Code: ${error.errorCode ?? 'N/A'} + Error Message: ${error.errorMessage} + ErrorStack: + ${error.stackTrace ?? 'N/A'} + `; + + erroredOutTextFileData += temporaryFormedData + '\n'; + }); + }); + + return erroredOutTextFileData; +}; + +export default generateErrorLogFile; diff --git a/packages/common/src/utils/removeAndAddNewSpinnerText.ts b/packages/common/src/utils/removeAndAddNewSpinnerText.ts index f02757a56..1152f3635 100644 --- a/packages/common/src/utils/removeAndAddNewSpinnerText.ts +++ b/packages/common/src/utils/removeAndAddNewSpinnerText.ts @@ -19,16 +19,18 @@ * @param spinnerName name of the spinner. * @param newSpinnerText The text to be added to the new spinner. * @param indent The indentation for the new text. + * @param failure This determines if updted spinner should show fail status. */ export default function removeAndAddNewSpinnerText( spinnies: any, spinnerName: string, newSpinnerText: string, - indent = 0 + indent = 0, + failure = false ) { spinnies.add(`${spinnerName}-succees`, { text: newSpinnerText, - status: 'succeed', + status: failure ? 'fail' : 'succeed', indent, }); diff --git a/packages/design-system/src/components/sidebar/useSidebar/constants.ts b/packages/design-system/src/components/sidebar/useSidebar/constants.ts index bb6314272..ce94fa2f8 100644 --- a/packages/design-system/src/components/sidebar/useSidebar/constants.ts +++ b/packages/design-system/src/components/sidebar/useSidebar/constants.ts @@ -17,6 +17,7 @@ export enum SIDEBAR_ITEMS_KEYS { COOKIES = 'cookies', COOKIES_WITH_ISSUES = 'cookie-issues', + URL_WITH_ISSUES = 'urls-with-issues', TECHNOLOGIES = 'technologies', PRIVACY_SANDBOX = 'privacy-sandbox', SITE_BOUNDARIES = 'site-boundaries', diff --git a/packages/design-system/src/components/table/useTable/types.ts b/packages/design-system/src/components/table/useTable/types.ts index 625b06fca..fc2609bb4 100644 --- a/packages/design-system/src/components/table/useTable/types.ts +++ b/packages/design-system/src/components/table/useTable/types.ts @@ -16,9 +16,17 @@ /** * External dependencies. */ -import type { CookieTableData, TechnologyData } from '@google-psat/common'; +import type { + CookieTableData, + TechnologyData, + ErroredOutUrlsData, +} from '@google-psat/common'; -export type TableData = (CookieTableData | TechnologyData) & { +export type TableData = ( + | CookieTableData + | TechnologyData + | ErroredOutUrlsData +) & { highlighted?: boolean; }; diff --git a/packages/report/src/dashboard/components/siteMapReport/index.tsx b/packages/report/src/dashboard/components/siteMapReport/index.tsx index 90861382b..6d7bf9db3 100644 --- a/packages/report/src/dashboard/components/siteMapReport/index.tsx +++ b/packages/report/src/dashboard/components/siteMapReport/index.tsx @@ -22,6 +22,7 @@ import type { CookieFrameStorageType, CompleteJson, LibraryData, + ErroredOutUrlsData, } from '@google-psat/common'; import { SidebarProvider, type SidebarItems } from '@google-psat/design-system'; @@ -37,6 +38,7 @@ interface SiteMapReportProps { completeJson: CompleteJson[] | null; path: string; libraryMatches: { [url: string]: LibraryData } | null; + erroredOutUrls: ErroredOutUrlsData[]; } const SiteMapReport = ({ @@ -44,12 +46,14 @@ const SiteMapReport = ({ completeJson, path, libraryMatches, + erroredOutUrls, }: SiteMapReportProps) => { const [data, setData] = useState(sidebarData); return ( >; path: string; libraryMatches: { [url: string]: LibraryData } | null; + erroredOutUrls: ErroredOutUrlsData[]; } const Layout = ({ @@ -57,14 +60,21 @@ const Layout = ({ sidebarData, setSidebarData, path, + erroredOutUrls, libraryMatches, }: LayoutProps) => { const [sites, setSites] = useState([]); useEffect(() => { const _sites = new Set(); - completeJson?.forEach(({ pageUrl }) => { - _sites.add(pageUrl); + completeJson?.forEach(({ pageUrl, erroredOutUrls: _erroredOutURLs }) => { + if ( + !_erroredOutURLs?.some( + ({ url, errorName }) => url === pageUrl && errorName !== 'i' + ) + ) { + _sites.add(pageUrl); + } }); setSites(Array.from(_sites)); @@ -183,9 +193,17 @@ const Layout = ({ }, }; + _data[SIDEBAR_ITEMS_KEYS.URL_WITH_ISSUES].panel = { + Element: ErroredOutUrls, + props: { + erroredOutUrls, + }, + }; + return _data; }); }, [ + erroredOutUrls, clearQuery, completeJson, cookiesWithIssues, diff --git a/packages/report/src/dashboard/components/siteMapReport/sidebarData.ts b/packages/report/src/dashboard/components/siteMapReport/sidebarData.ts index 4173ba334..5e60719d1 100644 --- a/packages/report/src/dashboard/components/siteMapReport/sidebarData.ts +++ b/packages/report/src/dashboard/components/siteMapReport/sidebarData.ts @@ -47,6 +47,24 @@ const sidebarData: SidebarItems = { }, }, }, + [SIDEBAR_ITEMS_KEYS.URL_WITH_ISSUES]: { + title: 'URL Issues', + children: {}, + icon: { + //@ts-ignore + Element: WarningBare, + props: { + className: 'fill-granite-gray', + }, + }, + selectedIcon: { + //@ts-ignore + Element: WarningBare, + props: { + className: 'fill-white', + }, + }, + }, }; export default sidebarData; diff --git a/packages/report/src/dashboard/components/siteMapReport/urlsWithIssues.tsx b/packages/report/src/dashboard/components/siteMapReport/urlsWithIssues.tsx new file mode 100644 index 000000000..61613d00f --- /dev/null +++ b/packages/report/src/dashboard/components/siteMapReport/urlsWithIssues.tsx @@ -0,0 +1,34 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * External dependencies. + */ +import type { ErroredOutUrlsData } from '@google-psat/common'; + +/** + * Internal dependencies. + */ +import ErroredOutUrls from '../urlsWithIssues'; + +interface URLSWithIssuesProps { + erroredOutUrls: ErroredOutUrlsData[]; +} + +const URLSWithIssues = ({ erroredOutUrls }: URLSWithIssuesProps) => { + return ; +}; + +export default URLSWithIssues; diff --git a/packages/report/src/dashboard/components/urlsWithIssues/index.tsx b/packages/report/src/dashboard/components/urlsWithIssues/index.tsx new file mode 100644 index 000000000..70e984abc --- /dev/null +++ b/packages/report/src/dashboard/components/urlsWithIssues/index.tsx @@ -0,0 +1,152 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * External dependencies + */ +import { useMemo, useState } from 'react'; +import { Resizable } from 're-resizable'; +import { noop, type ErroredOutUrlsData } from '@google-psat/common'; +import { I18n } from '@google-psat/i18n'; +import { + Table, + TableProvider, + type TableColumn, + type InfoType, + type TableRow, + type TableFilter, +} from '@google-psat/design-system'; + +interface ErroredOutUrlsProps { + erroredOutUrls: ErroredOutUrlsData[]; +} + +const ErroredOutUrls = ({ erroredOutUrls }: ErroredOutUrlsProps) => { + const [selectedRow, setSelectedRow] = useState(); + + const tableColumns = useMemo( + () => [ + { + header: 'URL', + accessorKey: 'url', + cell: (info: InfoType) => info, + enableHiding: false, + }, + { + header: 'Error Description', + accessorKey: 'errorMessage', + cell: (info: InfoType) => info, + }, + { + header: 'Error Code', + accessorKey: 'errorCode', + cell: (info: InfoType) => ( + {info} + ), + }, + ], + [] + ); + + const filters = useMemo(() => ({}), []); + + return ( +
+ + { + setSelectedRow(row as ErroredOutUrlsData); + }} + onRowContextMenu={noop} + getRowObjectKey={(row: TableRow) => { + return (row.originalData as ErroredOutUrlsData).url; + }} + > + + + +
+ {selectedRow ? ( +
+ {selectedRow.url && ( + <> +

+ Error Message +

+

+ {selectedRow.errorMessage} +

+ + )} + <> +

+ Error code +

+

+ {selectedRow?.errorCode || I18n.getMessage('noDescription')} +

+ + {selectedRow?.stackTrace && ( + <> +

+ Stack trace +

+

+

+                    
+                  
+

+ + )} +
+ ) : ( +
+

+ {I18n.getMessage('selectRowToPreview')} +

+
+ )} +
+ + ); +}; + +export default ErroredOutUrls; diff --git a/packages/report/src/dashboard/components/utils/extractReportData.ts b/packages/report/src/dashboard/components/utils/extractReportData.ts index 803e71932..e9dab9be0 100644 --- a/packages/report/src/dashboard/components/utils/extractReportData.ts +++ b/packages/report/src/dashboard/components/utils/extractReportData.ts @@ -20,6 +20,7 @@ import type { CompleteJson, CookieFrameStorageType, + ErroredOutUrlsData, LibraryData, TechnologyData, } from '@google-psat/common'; @@ -31,28 +32,47 @@ import extractCookies from './extractCookies'; const extractReportData = (data: CompleteJson[]) => { const landingPageCookies = {}; + const erroredOutUrlsData: ErroredOutUrlsData[] = []; const technologies: TechnologyData[] = []; const consolidatedLibraryMatches: { [url: string]: LibraryData } = {}; - data.forEach(({ cookieData, pageUrl, libraryMatches, technologyData }) => { - formatCookieData( - extractCookies(cookieData, pageUrl, true), - landingPageCookies - ); + data.forEach( + ({ + cookieData, + pageUrl, + libraryMatches, + technologyData, + erroredOutUrls, + }) => { + erroredOutUrlsData.push(...(erroredOutUrls ?? [])); - technologies.push( - ...technologyData.map((technology) => ({ - ...technology, - pageUrl, - })) - ); + if ( + erroredOutUrls && + erroredOutUrls.filter(({ url }) => url === pageUrl).length > 0 + ) { + return; + } - consolidatedLibraryMatches[pageUrl] = libraryMatches; - }); + formatCookieData( + extractCookies(cookieData, pageUrl, true), + landingPageCookies + ); + + technologies.push( + ...technologyData.map((technology) => ({ + ...technology, + pageUrl, + })) + ); + + consolidatedLibraryMatches[pageUrl] = libraryMatches; + } + ); return { landingPageCookies, consolidatedLibraryMatches, + erroredOutUrlsData, }; }; diff --git a/packages/report/src/dashboard/components/utils/reportDownloader/generateSiteMapReportandDownload.ts b/packages/report/src/dashboard/components/utils/reportDownloader/generateSiteMapReportandDownload.ts index e6f9b7d1f..b13d4d797 100644 --- a/packages/report/src/dashboard/components/utils/reportDownloader/generateSiteMapReportandDownload.ts +++ b/packages/report/src/dashboard/components/utils/reportDownloader/generateSiteMapReportandDownload.ts @@ -19,7 +19,11 @@ */ import JSZip from 'jszip'; import { saveAs } from 'file-saver'; -import { getCurrentDateAndTime, type CompleteJson } from '@google-psat/common'; +import { + getCurrentDateAndTime, + type CompleteJson, + generateErrorLogFile, +} from '@google-psat/common'; import { type TableFilter } from '@google-psat/design-system'; /** * Internal dependencies @@ -55,6 +59,10 @@ const generateSiteMapReportandDownload = async ( zip.file('report.html', report); + const errorLogs = generateErrorLogFile(JSONReport); + + zip.file('error_logs.txt', errorLogs); + const content = await zip.generateAsync({ type: 'blob' }); saveAs( content, diff --git a/packages/report/src/dashboard/components/utils/tests/data.mock.ts b/packages/report/src/dashboard/components/utils/tests/data.mock.ts index 39e525d5a..22132c724 100644 --- a/packages/report/src/dashboard/components/utils/tests/data.mock.ts +++ b/packages/report/src/dashboard/components/utils/tests/data.mock.ts @@ -22,6 +22,7 @@ import { type CompleteJson } from '@google-psat/common'; export const tempSinglePageData: CompleteJson = { pageUrl: 'https://edition.cnn.com/sitemaps/sitemap-section.xml', libraryMatches: {}, + erroredOutUrls: [], cookieData: { 'https://edition.cnn.com': { frameCookies: { @@ -75,6 +76,7 @@ export const tempMultiPageData: CompleteJson[] = [ { libraryMatches: {}, pageUrl: 'https://www.cnn.com/index.html', + erroredOutUrls: [], technologyData: [ { slug: 'varnish', @@ -127,6 +129,7 @@ export const tempMultiPageData: CompleteJson[] = [ { libraryMatches: {}, pageUrl: 'https://edition.cnn.com/index.html', + erroredOutUrls: [], technologyData: [ { slug: 'varnish', diff --git a/packages/report/src/dashboard/components/utils/tests/extractReportData.ts b/packages/report/src/dashboard/components/utils/tests/extractReportData.ts index 93b498d74..4139d91b5 100644 --- a/packages/report/src/dashboard/components/utils/tests/extractReportData.ts +++ b/packages/report/src/dashboard/components/utils/tests/extractReportData.ts @@ -24,6 +24,7 @@ describe('extractReportData', () => { it('should return an empty object if no cookies are present', () => { expect(extractReportData([])).toEqual({ landingPageCookies: {}, + erroredOutUrlsData: [], consolidatedLibraryMatches: {}, }); }); @@ -58,6 +59,7 @@ describe('extractReportData', () => { }, }, }, + erroredOutUrlsData: [], consolidatedLibraryMatches: { 'https://edition.cnn.com/index.html': {}, 'https://www.cnn.com/index.html': {},