Skip to content

feat: make storage purging default, add --resurrect #729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 12, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/check.yaml
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest, windows-2025]
node-version: [18, 20, 22, 24]

steps:
@@ -60,7 +60,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest, windows-2025]
python-version: ["3.9", "3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}

4 changes: 2 additions & 2 deletions .github/workflows/cucumber.yaml
Original file line number Diff line number Diff line change
@@ -16,9 +16,9 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest, windows-2025]
# We only test LTS for now
node-version: [20]
node-version: [22]

runs-on: ${{ matrix.os }}

1 change: 1 addition & 0 deletions src/commands/_register.ts
Original file line number Diff line number Diff line change
@@ -66,4 +66,5 @@ export const actorCommands = [
ActorGetInputCommand,
ActorChargeCommand,
HelpCommand,
UpgradeCommand,
] as const satisfies (typeof BuiltApifyCommand)[];
51 changes: 15 additions & 36 deletions src/commands/run.ts
Original file line number Diff line number Diff line change
@@ -60,29 +60,25 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
purge: Flags.boolean({
char: 'p',
description:
'Shortcut that combines the --purge-queue, --purge-dataset and --purge-key-value-store options.',
'Whether to purge the default request queue, dataset and key-value store before the run starts.\nFor crawlee projects, this is the default behavior, and the flag is optional.\nUse `--no-purge` to keep the storage folder intact.',
required: false,
default: true,
exclusive: ['resurrect'],
}),
'purge-queue': Flags.boolean({
description: 'Deletes the local directory containing the default request queue before the run starts.',
required: false,
}),
'purge-dataset': Flags.boolean({
description: 'Deletes the local directory containing the default dataset before the run starts.',
required: false,
}),
'purge-key-value-store': Flags.boolean({
resurrect: Flags.boolean({
description:
'Deletes all records from the default key-value store in the local directory before the run starts, except for the "INPUT" key.',
'Whether to keep the default request queue, dataset and key-value store before the run starts.',
required: false,
default: false,
exclusive: ['purge'],
}),
entrypoint: Flags.string({
description: [
'Optional entrypoint for running with injected environment variables.',
'\n',
'For Python, it is the module name, or a path to a file.',
'\n',
'For node.js, it is the npm script name, or a path to a JS/MJS file.',
'For Node.js, it is the npm script name, or a path to a JS/MJS file.',
'You can also pass in a directory name, provided that directory contains an "index.js" file.',
].join(' '),
required: false,
@@ -215,42 +211,25 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {

let CRAWLEE_PURGE_ON_START = '0';

// Mark resurrect as a special case of --no-purge
if (this.flags.resurrect) {
this.flags.purge = false;
}

// Purge stores
// TODO: this needs to be cleaned up heavily - ideally logic should be in the project analyzers
if (this.flags.purge) {
CRAWLEE_PURGE_ON_START = '1';

if (crawleeVersion.isNone()) {
await Promise.all([purgeDefaultQueue(), purgeDefaultKeyValueStore(), purgeDefaultDataset()]);
info({ message: 'All default local stores were purged.' });
}

// This might not be needed for python and scrapy projects
// if (type === ProjectLanguage.Python || type === ProjectLanguage.Scrapy) {
// await Promise.all([purgeDefaultQueue(), purgeDefaultKeyValueStore(), purgeDefaultDataset()]);
// info({ message: 'All default local stores were purged.' });
// }
}

// TODO: deprecate these flags
if (this.flags.purgeQueue && !this.flags.purge) {
await purgeDefaultQueue();
info({ message: 'Default local request queue was purged.' });
}

if (this.flags.purgeDataset && !this.flags.purge) {
await purgeDefaultDataset();
info({ message: 'Default local dataset was purged.' });
}

if (this.flags.purgeKeyValueStore && !this.flags.purge) {
await purgeDefaultKeyValueStore();
info({ message: 'Default local key-value store was purged.' });
}

if (!this.flags.purge) {
const isStorageEmpty = await checkIfStorageIsEmpty();
if (!isStorageEmpty) {

if (!isStorageEmpty && !this.flags.resurrect) {
warning({
message:
'The storage directory contains a previous state, the Actor will continue where it left off. ' +
4 changes: 3 additions & 1 deletion src/lib/command-framework/apify-command.ts
Original file line number Diff line number Diff line change
@@ -312,6 +312,8 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
});

return;
} else if (typeof builderData.hasDefault !== 'undefined') {
this.flags[camelCasedName] = builderData.hasDefault;
}
}
}
@@ -452,7 +454,7 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B

// yargs handles "no-" flags by negating the flag, so we need to handle that differently if we register a flag with a "no-" prefix
if (flagKey.startsWith('no-')) {
finalYargs = internalBuilderData.builder(finalYargs, flagKey.slice(3), [], true);
finalYargs = internalBuilderData.builder(finalYargs, flagKey.slice(3));
} else {
finalYargs = internalBuilderData.builder(finalYargs, flagKey);
}
11 changes: 4 additions & 7 deletions src/lib/command-framework/flags.ts
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ export interface TaggedFlagBuilder<
HasDefault = false,
> {
flagTag: Tag;
builder: (args: Argv, objectName: string, extraArgs?: string[], invertDefaultIfSet?: boolean) => Argv;
builder: (args: Argv, objectName: string, extraArgs?: string[]) => Argv;
choicesType: ChoicesType;
required: Required;
hasDefault: HasDefault;
@@ -67,7 +67,7 @@ function stringFlag<const Choices extends string[], const T extends StringFlagOp
): TaggedFlagBuilder<'string', Choices, T['default'] extends string ? true : T['required'], T['default']> {
return {
flagTag: 'string',
builder: (args, objectName, extraAliases, invertDefaultIfSet = false) => {
builder: (args, objectName, extraAliases) => {
const allAliases = new Set([...(options.aliases ?? []), ...(extraAliases ?? [])]);

if (options.char) {
@@ -82,7 +82,6 @@ function stringFlag<const Choices extends string[], const T extends StringFlagOp
alias: [...allAliases].map((alias) => kebabCaseString(camelCaseToKebabCase(alias))),
hidden: options.hidden ?? false,
conflicts: options.exclusive,
default: invertDefaultIfSet ? !options.default : options.default,
choices: options.choices,
string: true,
// we only require something be passed in if we don't have a default or read from stdin
@@ -107,7 +106,7 @@ function booleanFlag<const T extends BooleanFlagOptions>(
): TaggedFlagBuilder<'boolean', never, T['default'] extends boolean ? true : T['required'], T['default']> {
return {
flagTag: 'boolean',
builder: (args, objectName, extraAliases, invertDefaultIfSet = false) => {
builder: (args, objectName, extraAliases) => {
const allAliases = new Set([...(options.aliases ?? []), ...(extraAliases ?? [])]);

if (options.char) {
@@ -122,7 +121,6 @@ function booleanFlag<const T extends BooleanFlagOptions>(
alias: [...allAliases].map((alias) => kebabCaseString(camelCaseToKebabCase(alias))),
hidden: options.hidden ?? false,
conflicts: options.exclusive,
default: invertDefaultIfSet ? !options.default : options.default,
boolean: true,
});
},
@@ -144,7 +142,7 @@ function integerFlag<const T extends IntegerFlagOptions>(
): TaggedFlagBuilder<'integer', never, T['default'] extends number ? true : T['required'], T['default']> {
return {
flagTag: 'integer',
builder: (args, objectName, extraAliases, invertDefaultIfSet = false) => {
builder: (args, objectName, extraAliases) => {
const allAliases = new Set([...(options.aliases ?? []), ...(extraAliases ?? [])]);

if (options.char) {
@@ -159,7 +157,6 @@ function integerFlag<const T extends IntegerFlagOptions>(
alias: [...allAliases].map((alias) => kebabCaseString(camelCaseToKebabCase(alias))),
hidden: options.hidden ?? false,
conflicts: options.exclusive,
default: invertDefaultIfSet ? !options.default : options.default,
choices: options.choices,
string: true,
nargs: 1,
6 changes: 4 additions & 2 deletions vitest.config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// eslint-disable-next-line import/extensions
import { defineConfig } from 'vitest/config';

const isWindows = process.platform === 'win32';
const multiplierFactor = isWindows ? 4 : 1;

export default defineConfig({
esbuild: {
@@ -10,8 +12,8 @@ export default defineConfig({
test: {
globals: true,
restoreMocks: true,
testTimeout: 60_000 * (isWindows ? 2 : 1),
hookTimeout: 60_000 * (isWindows ? 2 : 1),
testTimeout: 60_000 * multiplierFactor,
hookTimeout: 60_000 * multiplierFactor,
include: ['**/*.{test,spec}.?(c|m)[jt]s?(x)'],
passWithNoTests: true,
silent: !process.env.NO_SILENT_TESTS,