From 0bc0e5a44901b2a8d5a5c657d821c0823eec238d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Glawaty?= Date: Wed, 18 Oct 2023 06:04:15 +0200 Subject: [PATCH] Fixed field "finishedAt" in the result - fixed the value of the field "finishedAt" in the result returned from Crawler - added errors logging in the Crawler's `errorHandler` handler --- CHANGELOG.md | 5 +++++ src/crawler/crawler.mjs | 16 ++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a6cf95..098f603 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +### Added +- Added errors logging in the Crawler's `errorHandler` handler. + +### Fixed +- Fixed the value of the field "finishedAt" in the result returned from Crawler. ## v0.5.1 - 2023-09-20 ### Fixed diff --git a/src/crawler/crawler.mjs b/src/crawler/crawler.mjs index abe5298..f3faf70 100644 --- a/src/crawler/crawler.mjs +++ b/src/crawler/crawler.mjs @@ -82,7 +82,7 @@ export class Crawler { this.#cleanup(userDataDir, scenarioId); } - const result = await this.#scenarioRepository.get(scenarioId); + let result = await this.#scenarioRepository.get(scenarioId); let isAnyUrlSuccessfullyVisited = false; for (let visitedUrl of result.results.visitedUrls) { @@ -96,18 +96,13 @@ export class Crawler { if (isAnyUrlSuccessfullyVisited) { await this.#scenarioRepository.markAdCompleted(scenarioId); await logger.info(`Scenario ${scenarioId} completed`); - - result.status = 'completed'; } else { const errorMessage = 'No url has been successfully crawled.'; await this.#scenarioRepository.markAsFailed(scenarioId, errorMessage); await logger.error(new Error(`Scenario ${scenarioId} failed, reason: ${errorMessage}`)); - - result.status = 'failed'; - result.error = errorMessage; } - return result; + return await this.#scenarioRepository.get(scenarioId); } async #doCrawl(scenarioId, config, logger, updateProgressHandler, userDataDir) { @@ -304,10 +299,15 @@ export class Crawler { await updateProgress(crawler); }, - async errorHandler({ crawler, request }) { + async errorHandler({ crawler, request }, err) { if ((await checkAbortion(crawler))) { request.noRetry = true; } + + const scene = request.userData.scene || '?'; + const currentUrl = request.url; + + await logger.warning(`Failed to crawl URL ${currentUrl} (scene "${scene}"). The request has been reclaimed back to the queue. ${err.toString()}`); }, }, configuration);