From f0f38f34ffcdd536c4deb5375bca113c458fbbb6 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 3 Oct 2023 06:28:10 -0500 Subject: [PATCH 1/4] Improve `-dump-hashes` output Signed-off-by: Ben Sherman --- .../main/groovy/nextflow/processor/TaskProcessor.groovy | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 8e71969661..c175be1e9e 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2192,14 +2192,9 @@ class TaskProcessor { } private void traceInputsHashes( TaskRun task, List entries, CacheHelper.HashMode mode, hash ) { + def collector = (item) -> "${CacheHelper.hasher(item, mode).hash()} [${item?.getClass()?.getName()}] ${item?.toString()?.replace('\n', '\\n')}" - def buffer = new StringBuilder() - buffer.append("[${safeTaskName(task)}] cache hash: ${hash}; mode: $mode; entries: \n") - for( Object item : entries ) { - buffer.append( " ${CacheHelper.hasher(item, mode).hash()} [${item?.getClass()?.getName()}] $item \n") - } - - log.info(buffer.toString()) + log.info "[${safeTaskName(task)}] cache hash: ${hash}; mode: ${mode}; entries: ${entries.collect(collector).join(' | ')}" } protected Map getTaskGlobalVars(TaskRun task) { From f2aa2eedc786e424f314f8bfa541b7b18e9367e0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 6 Oct 2023 10:42:05 -0500 Subject: [PATCH 2/4] Output as JSON, make optional with `-dump-hashes json` Signed-off-by: Ben Sherman --- docs/cache-and-resume.md | 27 +++++++++++++++++++ docs/cli.md | 5 +++- .../src/main/groovy/nextflow/Session.groovy | 4 +-- .../main/groovy/nextflow/cli/CmdRun.groovy | 2 +- .../main/groovy/nextflow/cli/Launcher.groovy | 4 +++ .../nextflow/config/ConfigBuilder.groovy | 10 ++++--- .../nextflow/processor/TaskProcessor.groovy | 24 ++++++++++++++--- 7 files changed, 65 insertions(+), 11 deletions(-) diff --git a/docs/cache-and-resume.md b/docs/cache-and-resume.md index 0fc5ddf4a0..55b890ef43 100644 --- a/docs/cache-and-resume.md +++ b/docs/cache-and-resume.md @@ -186,6 +186,8 @@ nextflow run rnaseq-nf -resume 4dc656d2-c410-44c8-bc32-7dd0ea87bebf You can use the {ref}`cli-log` command to view all previous runs as well as the task executions for each run. +(cache-compare-hashes)= + ### Comparing the hashes of two runs One way to debug a resumed run is to compare the task hashes of each run using the `-dump-hashes` option. @@ -196,3 +198,28 @@ One way to debug a resumed run is to compare the task hashes of each run using t 4. Compare the runs with a diff viewer While some manual effort is required, the final diff can often reveal the exact change that caused a task to be re-executed. + +:::{versionadded} 23.10.0 +::: + +When using `-dump-hashes json`, the task hashes can be more easily extracted into a diff. Here is an example Bash script to perform two runs and produce a diff: + +```bash +nextflow -log run_1.log run $pipeline -dump-hashes json +nextflow -log run_2.log run $pipeline -dump-hashes json -resume + +get_hashes() { + cat $1 \ + | grep 'cache hash:' \ + | cut -d ' ' -f 10- \ + | sort \ + | awk '{ print; print ""; }' +} + +get_hashes run_1.log > run_1.tasks.log +get_hashes run_2.log > run_2.tasks.log + +diff run_1.tasks.log run_2.tasks.log +``` + +You can then view the `diff` output or use a graphical diff viewer to compare `run_1.tasks.log` and `run_2.tasks.log`. diff --git a/docs/cli.md b/docs/cli.md index 310c97b35d..34524316cc 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1154,7 +1154,10 @@ The `run` command is used to execute a local pipeline script or remote pipeline : Dump channels for debugging purpose. `-dump-hashes` -: Dump task hash keys for debugging purpose. +: Dump task hash keys for debugging purposes. +: :::{versionadded} 23.10.0 + You can use `-dump-hashes json` to dump the task hash keys as JSON for easier post-processing. See the {ref}`caching and resuming tips ` for more details. + ::: `-e.=` : Add the specified variable to execution environment. diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index f8e2cffbd5..8d371b5e6a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -242,11 +242,11 @@ class Session implements ISession { boolean getStatsEnabled() { statsEnabled } - private boolean dumpHashes + private String dumpHashes private List dumpChannels - boolean getDumpHashes() { dumpHashes } + String getDumpHashes() { dumpHashes } List getDumpChannels() { dumpChannels } diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 67b4fba639..e25304fa67 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -220,7 +220,7 @@ class CmdRun extends CmdBase implements HubOptions { String profile @Parameter(names=['-dump-hashes'], description = 'Dump task hash keys for debugging purpose') - boolean dumpHashes + String dumpHashes @Parameter(names=['-dump-channels'], description = 'Dump channels for debugging purpose') String dumpChannels diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index a2deedbb07..0eb5bb1052 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -224,6 +224,10 @@ class Launcher { normalized << '%all' } + else if( current == '-dump-hashes' && (i==args.size() || args[i].startsWith('-'))) { + normalized << '-' + } + else if( current == '-with-trace' && (i==args.size() || args[i].startsWith('-'))) { normalized << '-' } diff --git a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy index f19a785619..62329bcc9c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy @@ -595,9 +595,13 @@ class ConfigBuilder { if( config.isSet('resume') ) config.resume = normalizeResumeId(config.resume as String) - // -- sets `dumpKeys` option - if( cmdRun.dumpHashes ) - config.dumpHashes = cmdRun.dumpHashes + // -- sets `dumpHashes` option + if( cmdRun.dumpHashes ) { + if( cmdRun.dumpHashes != '-' ) + config.dumpHashes = cmdRun.dumpHashes + else + config.dumpHashes = 'default' + } if( cmdRun.dumpChannels ) config.dumpChannels = cmdRun.dumpChannels.tokenize(',') diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 68e3e2edd4..8bc47fc7f8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -32,6 +32,7 @@ import java.util.regex.Pattern import ch.artecat.grengine.Grengine import com.google.common.hash.HashCode +import groovy.json.JsonOutput import groovy.transform.CompileStatic import groovy.transform.Memoized import groovy.transform.PackageScope @@ -2154,9 +2155,10 @@ class TaskProcessor { final mode = config.getHashMode() final hash = computeHash(keys, mode) - if( session.dumpHashes ) { + if( session.dumpHashes == 'json' ) + traceInputsHashesJson(task, keys, mode, hash) + else if( session.dumpHashes ) traceInputsHashes(task, keys, mode, hash) - } return hash } @@ -2191,10 +2193,24 @@ class TaskProcessor { return result } + private void traceInputsHashesJson( TaskRun task, List entries, CacheHelper.HashMode mode, hash ) { + def collector = (item) -> [ + hash: CacheHelper.hasher(item, mode).hash().toString(), + type: item?.getClass()?.getName(), + value: item?.toString() + ] + log.info "[${safeTaskName(task)}] cache hash: ${hash}; mode: ${mode}; entries: ${JsonOutput.toJson(entries.collect(collector))}" + } + private void traceInputsHashes( TaskRun task, List entries, CacheHelper.HashMode mode, hash ) { - def collector = (item) -> "${CacheHelper.hasher(item, mode).hash()} [${item?.getClass()?.getName()}] ${item?.toString()?.replace('\n', '\\n')}" - log.info "[${safeTaskName(task)}] cache hash: ${hash}; mode: ${mode}; entries: ${entries.collect(collector).join(' | ')}" + def buffer = new StringBuilder() + buffer.append("[${safeTaskName(task)}] cache hash: ${hash}; mode: $mode; entries: \n") + for( Object item : entries ) { + buffer.append( " ${CacheHelper.hasher(item, mode).hash()} [${item?.getClass()?.getName()}] $item \n") + } + + log.info(buffer.toString()) } protected Map getTaskGlobalVars(TaskRun task) { From 50c77978cbd52b754a6256b07cb008d73fb9710b Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 15 Oct 2023 15:56:48 +0200 Subject: [PATCH 3/4] Minor changes [ci fast] Signed-off-by: Paolo Di Tommaso --- .../groovy/nextflow/config/ConfigBuilder.groovy | 5 +---- .../nextflow/processor/TaskProcessor.groovy | 15 +++++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy index 0c1995eb25..d30ad9e0dd 100644 --- a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy @@ -597,10 +597,7 @@ class ConfigBuilder { // -- sets `dumpHashes` option if( cmdRun.dumpHashes ) { - if( cmdRun.dumpHashes != '-' ) - config.dumpHashes = cmdRun.dumpHashes - else - config.dumpHashes = 'default' + config.dumpHashes = cmdRun.dumpHashes != '-' ? cmdRun.dumpHashes : 'default' } if( cmdRun.dumpChannels ) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 8bc47fc7f8..884780a6bc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2155,10 +2155,12 @@ class TaskProcessor { final mode = config.getHashMode() final hash = computeHash(keys, mode) - if( session.dumpHashes == 'json' ) - traceInputsHashesJson(task, keys, mode, hash) - else if( session.dumpHashes ) - traceInputsHashes(task, keys, mode, hash) + if( session.dumpHashes ) { + session.dumpHashes=='json' + ? traceInputsHashesJson(task, keys, mode, hash) + : traceInputsHashes(task, keys, mode, hash) + } + return hash } @@ -2194,12 +2196,13 @@ class TaskProcessor { } private void traceInputsHashesJson( TaskRun task, List entries, CacheHelper.HashMode mode, hash ) { - def collector = (item) -> [ + final collector = (item) -> [ hash: CacheHelper.hasher(item, mode).hash().toString(), type: item?.getClass()?.getName(), value: item?.toString() ] - log.info "[${safeTaskName(task)}] cache hash: ${hash}; mode: ${mode}; entries: ${JsonOutput.toJson(entries.collect(collector))}" + final json = JsonOutput.toJson(entries.collect(collector)) + log.info "[${safeTaskName(task)}] cache hash: ${hash}; mode: ${mode}; entries: ${JsonOutput.prettyPrint(json)}" } private void traceInputsHashes( TaskRun task, List entries, CacheHelper.HashMode mode, hash ) { From 4c437539a9adab9194133cf4ac081d0f688252ae Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Sun, 15 Oct 2023 15:58:52 +0200 Subject: [PATCH 4/4] Update modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy [ci skip] Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/processor/TaskProcessor.groovy | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index 884780a6bc..9977105775 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -2160,7 +2160,6 @@ class TaskProcessor { ? traceInputsHashesJson(task, keys, mode, hash) : traceInputsHashes(task, keys, mode, hash) } - return hash }