Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(browser-pool): improve error handling when browser is not found #2050

Merged
merged 7 commits into from
Aug 30, 2023
4 changes: 3 additions & 1 deletion packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,9 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
);
request.state = RequestState.DONE;
} catch (secondaryError: any) {
if (!secondaryError.triggeredFromUserHandler) {
if (!secondaryError.triggeredFromUserHandler
// avoid reprinting the same critical error multiple times, as it will be printed by Nodejs at the end anyway
&& !(secondaryError instanceof CriticalError)) {
const apifySpecific = process.env.APIFY_IS_AT_HOME
? `This may have happened due to an internal error of Apify's API or due to a misconfigured crawler.` : '';
this.log.exception(secondaryError as Error, 'An exception occurred during handling of failed request. '
Expand Down
1 change: 1 addition & 0 deletions packages/browser-pool/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"dependencies": {
"@apify/log": "^2.4.0",
"@apify/timeout": "^0.3.0",
"@crawlee/core": "3.5.2",
"@crawlee/types": "3.5.2",
"fingerprint-generator": "^2.0.6",
"fingerprint-injector": "^2.0.5",
Expand Down
25 changes: 23 additions & 2 deletions packages/browser-pool/src/playwright/playwright-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import net from 'net';
import os from 'os';
import path from 'path';

import { CriticalError } from '@crawlee/core';
import type { Browser as PlaywrightBrowser, BrowserType } from 'playwright';

import { loadFirefoxAddon } from './load-firefox-addon';
Expand Down Expand Up @@ -63,7 +64,9 @@ export class PlaywrightPlugin extends BrowserPlugin<BrowserType, SafeParameters<

try {
if (useIncognitoPages) {
browser = await this.library.launch(launchOptions);
browser = await this.library.launch(launchOptions).catch((error) => {
return this._throwOnFailedLaunch(launchContext, error);
});

if (anonymizedProxyUrl) {
browser.on('disconnected', async () => {
Expand Down Expand Up @@ -108,7 +111,9 @@ export class PlaywrightPlugin extends BrowserPlugin<BrowserType, SafeParameters<
}
}

const browserContext = await this.library.launchPersistentContext(userDataDir, launchOptions);
const browserContext = await this.library.launchPersistentContext(userDataDir, launchOptions).catch((error) => {
return this._throwOnFailedLaunch(launchContext, error);
});

browserContext.once('close', () => {
if (userDataDir.includes('apify-playwright-firefox-taac-')) {
Expand Down Expand Up @@ -174,6 +179,22 @@ export class PlaywrightPlugin extends BrowserPlugin<BrowserType, SafeParameters<
return browser;
}

private _throwOnFailedLaunch(launchContext: LaunchContext<BrowserType>, cause: unknown): never {
let debugMessage = `Failed to launch browser.`
+ `${launchContext.launchOptions?.executablePath
? ` Check whether the provided executable path is correct: ${launchContext.launchOptions?.executablePath}.` : ''}`;
if (process.env.APIFY_IS_AT_HOME) {
debugMessage += ' Make sure your Dockerfile extends apify/actor-node-playwright-*` (with a correct browser name). Or install';
} else {
debugMessage += ' Try installing';
}
debugMessage += ' the required dependencies by running `npx playwright install --with-deps` (https://playwright.dev/docs/browsers).'
+ ' The original error will be displayed at the bottom as the [cause].';
throw new CriticalError(debugMessage, {
cause,
});
}

protected _createController(): BrowserController<BrowserType, SafeParameters<BrowserType['launch']>[0], PlaywrightBrowser> {
return new PlaywrightController(this);
}
Expand Down
19 changes: 16 additions & 3 deletions packages/browser-pool/src/puppeteer/puppeteer-plugin.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { CriticalError } from '@crawlee/core';
import type { Dictionary } from '@crawlee/types';
import type Puppeteer from 'puppeteer';
import type * as PuppeteerTypes from 'puppeteer';
Expand Down Expand Up @@ -72,10 +73,22 @@ export class PuppeteerPlugin extends BrowserPlugin<
await close();
});
}
} catch (error) {
} catch (error: any) {
await close();

throw error;
let debugMessage = `Failed to launch browser.`
+ `${launchContext.launchOptions?.executablePath
? ` Check whether the provided executable path is correct: ${launchContext.launchOptions?.executablePath}.` : ''}`;
if (process.env.APIFY_IS_AT_HOME) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it feels weird that for playwright we handle this outside of apify platform too, but here we dont. FYI puppeteer now has a similar CLI for installing the browsers https://pptr.dev/browsers-api/

not sure if its already mentioned in the error produced by puppetter?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. But the bad thing about this command is that it installs executable into the folder where it was executed, and under a long directory hierarchy, like chromium/mac_arm/-xxxx.../../.../MacOS/Chromium, and you have to be aware of that executable location, to e.g. pass it into PUPPETEER_EXECUTABLE_PATH

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It mentions troubleshooting guide though most of the times

debugMessage += ' Make sure your Dockerfile extends `apify/actor-node-puppeteer-chrome. Or install';
} else {
debugMessage += ' Try installing';
}
debugMessage += ` a browser, if it's missing, by running \`npx @puppeteer/browsers install chromium --path [path]\``
+ ` and pointing \`executablePath\` to the downloaded executable (https://pptr.dev/browsers-api).`
+ ` The original error will be displayed at the bottom as the [cause].`;
throw new CriticalError(debugMessage, {
cause: error,
});
}
}

Expand Down
8 changes: 7 additions & 1 deletion packages/core/src/autoscaling/autoscaled_pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { Snapshotter } from './snapshotter';
import type { SystemInfo, SystemStatusOptions } from './system_status';
import { SystemStatus } from './system_status';
import { Configuration } from '../configuration';
import { CriticalError } from '../errors';
import { log as defaultLog } from '../log';

export interface AutoscaledPoolOptions {
Expand Down Expand Up @@ -551,7 +552,12 @@ export class AutoscaledPool {
// We might have already rejected this promise.
if (this.reject) {
// No need to log all concurrent errors.
this.log.exception(err, 'runTaskFunction failed.');
if (
// avoid reprinting the same critical error multiple times, as it will be printed by Nodejs at the end anyway
!(e instanceof CriticalError)
) {
this.log.exception(err, 'runTaskFunction failed.');
}
this.reject(err);
}
}
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export class NonRetryableError extends Error {}

/**
* Errors of `CriticalError` type will shut down the whole crawler.
* Error handlers catching CriticalError should avoid logging it, as it will be logged by Node.js itself at the end
*/
export class CriticalError extends NonRetryableError {}

Expand Down
1 change: 1 addition & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,7 @@ __metadata:
dependencies:
"@apify/log": ^2.4.0
"@apify/timeout": ^0.3.0
"@crawlee/core": 3.5.2
"@crawlee/types": 3.5.2
fingerprint-generator: ^2.0.6
fingerprint-injector: ^2.0.5
Expand Down