Skip to content

Commit

Permalink
Fixed field "finishedAt" in the result
Browse files Browse the repository at this point in the history
- fixed the value of the field "finishedAt" in the result returned from Crawler
- added errors logging in the Crawler's `errorHandler` handler
  • Loading branch information
tg666 committed Oct 18, 2023
1 parent 3f40fab commit 0bc0e5a
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased
### Added
- Added errors logging in the Crawler's `errorHandler` handler.

### Fixed
- Fixed the value of the field "finishedAt" in the result returned from Crawler.

## v0.5.1 - 2023-09-20
### Fixed
Expand Down
16 changes: 8 additions & 8 deletions src/crawler/crawler.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ export class Crawler {
this.#cleanup(userDataDir, scenarioId);
}

const result = await this.#scenarioRepository.get(scenarioId);
let result = await this.#scenarioRepository.get(scenarioId);
let isAnyUrlSuccessfullyVisited = false;

for (let visitedUrl of result.results.visitedUrls) {
Expand All @@ -96,18 +96,13 @@ export class Crawler {
if (isAnyUrlSuccessfullyVisited) {
await this.#scenarioRepository.markAdCompleted(scenarioId);
await logger.info(`Scenario ${scenarioId} completed`);

result.status = 'completed';
} else {
const errorMessage = 'No url has been successfully crawled.';
await this.#scenarioRepository.markAsFailed(scenarioId, errorMessage);
await logger.error(new Error(`Scenario ${scenarioId} failed, reason: ${errorMessage}`));

result.status = 'failed';
result.error = errorMessage;
}

return result;
return await this.#scenarioRepository.get(scenarioId);
}

async #doCrawl(scenarioId, config, logger, updateProgressHandler, userDataDir) {
Expand Down Expand Up @@ -304,10 +299,15 @@ export class Crawler {
await updateProgress(crawler);
},

async errorHandler({ crawler, request }) {
async errorHandler({ crawler, request }, err) {
if ((await checkAbortion(crawler))) {
request.noRetry = true;
}

const scene = request.userData.scene || '?';
const currentUrl = request.url;

await logger.warning(`Failed to crawl URL ${currentUrl} (scene "${scene}"). The request has been reclaimed back to the queue. ${err.toString()}`);
},
}, configuration);

Expand Down

0 comments on commit 0bc0e5a

Please sign in to comment.