-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial separation of washington state specific configs and rules
This separates the washginton-specific rules and configs from the new global workflow. Subsequent commits will tune global rules and configs, and rooting. The washginton-specific workflow can run via: nextstrain build phylogenetic --configfile build-configs/washington-state/config.yaml
- Loading branch information
Showing
6 changed files
with
112 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,37 @@ | ||
# This configuration file contains the custom configurations parameters | ||
# for the CI workflow to run with the example data. | ||
# for the Washington State phylogenetic build with custom rules and metadata | ||
|
||
# Pull in metadata and sequences from the example_data directory | ||
input_metadata: "example_data/metadata.tsv" | ||
input_sequences: "example_data/sequences.fasta" | ||
# Use 'NY99' as the reference since it should be basel to the USA sequences | ||
reference: "defaults/reference.gb" | ||
# Use 'IS88' as the root strain on the phylogenetic tree to place samples within the global context | ||
root: "AF481864" | ||
|
||
# Pull in metadata and sequences from the ingest directory after it has been annotated with washington-state specific metadata | ||
input_metadata: "../ingest/results/metadata.tsv" | ||
input_sequences: "../ingest/results/sequences.fasta" | ||
|
||
# This command excludes all strains by default and then forces the inclusion of | ||
# the strains selected by the subsampling logic defined above. | ||
subsampling: | ||
state: --query "state == 'WA'" --min-length '9800' --subsample-max-sequences 5000 | ||
neighboring_state: --query "state in ['CA', 'ID', 'OR', 'NV']" --group-by state year --min-length '9800' --subsample-max-sequences 5000 | ||
region: --query "state in ['AZ','NM', 'CO', 'UT', 'WY', 'MT']" --group-by state year --min-length '9800' --subsample-max-sequences 5000 | ||
country: --query "country == 'USA' and state not in ['WA', 'CA', 'ID', 'OR', 'NV','AZ','NM', 'CO', 'UT', 'WY', 'MT'] and accession != 'NC_009942'" --group-by state year --subsample-max-sequences 300 --min-length '9800' | ||
force_include: --exclude-all --include ../nextclade/defaults/include.txt | ||
|
||
traits: | ||
metadata_columns: [ | ||
'country', | ||
'division', | ||
'location', | ||
'clade_membership', | ||
'host' | ||
] | ||
|
||
export: | ||
auspice_config: "defaults/auspice_config.json" | ||
|
||
## Custom rules to run as part of the CI automated workflow | ||
## The paths should be relative to the phylogenetic directory. | ||
#custom_rules: | ||
# - build-configs/ci/copy_example_data.smk | ||
custom_rules: | ||
- build-configs/washington-state/washington-state-rules.smk |
17 changes: 0 additions & 17 deletions
17
phylogenetic/build-configs/washington-state/copy_example_data.smk
This file was deleted.
Oops, something went wrong.
77 changes: 77 additions & 0 deletions
77
phylogenetic/build-configs/washington-state/washington-state-rules.smk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
""" | ||
These are washington specific rules for the phylogenetic workflow. | ||
""" | ||
|
||
rule create_lat_longs: | ||
""" | ||
This rule creates an averaged lat_longs.tsv file from the metadata_filtered.tsv file, but this requires a USA state annotation. This rule fails on global datasets. | ||
""" | ||
input: | ||
metadata = "results/metadata_filtered.tsv" | ||
output: | ||
lat_longs = "results/lat_longs.tsv" | ||
log: | ||
"logs/lat_longs.txt", | ||
benchmark: | ||
"benchmarks/lat_longs.txt" | ||
shell: | ||
""" | ||
python ./scripts/create_lat_longs.py {input.metadata} {output.lat_longs} 2>&1 | tee {log} | ||
""" | ||
|
||
|
||
rule create_colors: | ||
input: | ||
metadata = "results/metadata_filtered.tsv" | ||
output: | ||
colors = "results/colors.tsv" | ||
log: | ||
"logs/colors.txt", | ||
benchmark: | ||
"benchmarks/colors.txt" | ||
shell: | ||
""" | ||
python ./scripts/make_colors.py {input.metadata} {output.colors} 2>&1 | tee {log} | ||
""" | ||
|
||
|
||
rule export_washington_build: | ||
""" | ||
This part of the workflow collects the phylogenetic tree and annotations to | ||
export a Nextstrain dataset. | ||
This includes incorporating the lat_long.tsv annotation. | ||
""" | ||
input: | ||
tree = "results/tree.nwk", | ||
metadata = "results/metadata_filtered.tsv", | ||
branch_lengths = "results/branch_lengths.json", | ||
traits = "results/traits.json", | ||
nt_muts = "results/nt_muts.json", | ||
aa_muts = "results/aa_muts.json", | ||
colors = "results/colors.tsv", | ||
description = config["export"]["description"], | ||
lat_longs = "results/lat_longs.tsv", | ||
auspice_config = config["export"]["auspice_config"], | ||
output: | ||
auspice = "auspice/WNV_genome.json" | ||
log: | ||
"logs/export.txt", | ||
benchmark: | ||
"benchmarks/export.txt" | ||
shell: | ||
""" | ||
augur export v2 \ | ||
--tree {input.tree} \ | ||
--metadata {input.metadata} \ | ||
--metadata-id-columns "accession" \ | ||
--node-data {input.branch_lengths} {input.traits} {input.nt_muts} {input.aa_muts} \ | ||
--colors {input.colors} \ | ||
--lat-longs {input.lat_longs} \ | ||
--description {input.description} \ | ||
--auspice-config {input.auspice_config} \ | ||
--output {output.auspice} 2>&1 | tee {log} | ||
""" | ||
|
||
# Add a Snakemake ruleorder directive here if you need to resolve ambiguous rules | ||
# that have the same output as the copy_example_data rule. | ||
ruleorder: export_washington_build > export |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters