Skip to content

Commit

Permalink
fix: various helpers opening KVS now respect Configuration (#2071)
Browse files Browse the repository at this point in the history
  • Loading branch information
barjin authored Sep 11, 2023
1 parent f9fb5c4 commit 59dbb16
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 8 deletions.
8 changes: 7 additions & 1 deletion packages/core/src/crawlers/statistics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ export class Statistics {
*/
readonly requestRetryHistogram: number[] = [];

/**
* Contains the associated Configuration instance
*/
private readonly config: Configuration;

private keyValueStore?: KeyValueStore = undefined;
private persistStateKey = `SDK_CRAWLER_STATISTICS_${this.id}`;
private logIntervalMillis: number;
Expand Down Expand Up @@ -111,6 +116,7 @@ export class Statistics {
this.keyValueStore = keyValueStore;
this.listener = this.persistState.bind(this);
this.events = config.getEventManager();
this.config = config;

// initialize by "resetting"
this.reset();
Expand Down Expand Up @@ -239,7 +245,7 @@ export class Statistics {
* displaying the current state in predefined intervals
*/
async startCapturing() {
this.keyValueStore ??= await KeyValueStore.open();
this.keyValueStore ??= await KeyValueStore.open(null, { config: this.config });

await this._maybeLoadStatistics();

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/storages/dataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ export class Dataset<Data extends Dictionary = Dictionary> {
* @param [contentType] Only JSON and CSV are supported currently, defaults to JSON.
*/
async exportTo(key: string, options?: ExportOptions, contentType?: string): Promise<void> {
const kvStore = await KeyValueStore.open(options?.toKVS ?? null);
const kvStore = await KeyValueStore.open(options?.toKVS ?? null, { config: this.config });
const items: Data[] = [];

const fetchNextChunk = async (offset = 0): Promise<void> => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import vm from 'vm';
import { LruCache } from '@apify/datastructures';
import log_ from '@apify/log';
import type { Request } from '@crawlee/browser';
import { validators, KeyValueStore, RequestState } from '@crawlee/browser';
import { validators, KeyValueStore, RequestState, Configuration } from '@crawlee/browser';
import type { BatchAddRequestsResult } from '@crawlee/types';
import type { CheerioRoot, Dictionary } from '@crawlee/utils';
import * as cheerio from 'cheerio';
Expand Down Expand Up @@ -498,6 +498,12 @@ export interface SaveSnapshotOptions {
* @default null
*/
keyValueStoreName?: string | null;

/**
* Configuration of the crawler that will be used to save the snapshot.
* @default Configuration.getGlobalConfig()
*/
config?: Configuration;
}

/**
Expand All @@ -513,6 +519,7 @@ export async function saveSnapshot(page: Page, options: SaveSnapshotOptions = {}
saveScreenshot: ow.optional.boolean,
saveHtml: ow.optional.boolean,
keyValueStoreName: ow.optional.string,
config: ow.optional.object,
}));

const {
Expand All @@ -521,10 +528,11 @@ export async function saveSnapshot(page: Page, options: SaveSnapshotOptions = {}
saveScreenshot = true,
saveHtml = true,
keyValueStoreName,
config,
} = options;

try {
const store = await KeyValueStore.open(keyValueStoreName);
const store = await KeyValueStore.open(keyValueStoreName, { config: config ?? Configuration.getGlobalConfig() });

if (saveScreenshot) {
const screenshotName = `${key}.jpg`;
Expand Down Expand Up @@ -756,7 +764,7 @@ export function registerUtilsToContext(context: PlaywrightCrawlingContext): void
context.blockRequests = (options?: BlockRequestsOptions) => blockRequests(context.page, options);
context.parseWithCheerio = () => parseWithCheerio(context.page);
context.infiniteScroll = (options?: InfiniteScrollOptions) => infiniteScroll(context.page, options);
context.saveSnapshot = (options?: SaveSnapshotOptions) => saveSnapshot(context.page, options);
context.saveSnapshot = (options?: SaveSnapshotOptions) => saveSnapshot(context.page, { ...options, config: context.crawler.config });
context.enqueueLinksByClickingElements = (options: Omit<EnqueueLinksByClickingElementsOptions, 'page' | 'requestQueue'>) => enqueueLinksByClickingElements({
...options,
page: context.page,
Expand Down
14 changes: 11 additions & 3 deletions packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import vm from 'vm';
import { LruCache } from '@apify/datastructures';
import log_ from '@apify/log';
import type { Request } from '@crawlee/browser';
import { KeyValueStore, RequestState, validators } from '@crawlee/browser';
import { KeyValueStore, RequestState, validators, Configuration } from '@crawlee/browser';
import type { Dictionary, BatchAddRequestsResult } from '@crawlee/types';
import type { CheerioRoot } from '@crawlee/utils';
import * as cheerio from 'cheerio';
Expand Down Expand Up @@ -630,6 +630,12 @@ export interface SaveSnapshotOptions {
* @default null
*/
keyValueStoreName?: string | null;

/**
* Configuration of the crawler that will be used to save the snapshot.
* @default Configuration.getGlobalConfig()
*/
config?: Configuration;
}

/**
Expand All @@ -645,6 +651,7 @@ export async function saveSnapshot(page: Page, options: SaveSnapshotOptions = {}
saveScreenshot: ow.optional.boolean,
saveHtml: ow.optional.boolean,
keyValueStoreName: ow.optional.string,
config: ow.optional.object,
}));

const {
Expand All @@ -653,10 +660,11 @@ export async function saveSnapshot(page: Page, options: SaveSnapshotOptions = {}
saveScreenshot = true,
saveHtml = true,
keyValueStoreName,
config,
} = options;

try {
const store = await KeyValueStore.open(keyValueStoreName);
const store = await KeyValueStore.open(keyValueStoreName, { config: config ?? Configuration.getGlobalConfig() });

if (saveScreenshot) {
const screenshotName = `${key}.jpg`;
Expand Down Expand Up @@ -962,7 +970,7 @@ export function registerUtilsToContext(context: PuppeteerCrawlingContext): void
context.addInterceptRequestHandler = (handler: InterceptHandler) => addInterceptRequestHandler(context.page, handler);
context.removeInterceptRequestHandler = (handler: InterceptHandler) => removeInterceptRequestHandler(context.page, handler);
context.infiniteScroll = (options?: InfiniteScrollOptions) => infiniteScroll(context.page, options);
context.saveSnapshot = (options?: SaveSnapshotOptions) => saveSnapshot(context.page, options);
context.saveSnapshot = (options?: SaveSnapshotOptions) => saveSnapshot(context.page, { ...options, config: context.crawler.config });
context.closeCookieModals = () => closeCookieModals(context.page);
}

Expand Down

0 comments on commit 59dbb16

Please sign in to comment.