From f4ee9703297b624d639a5a5c4cfdbe26dcd2dcc0 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 21 Jan 2025 10:32:56 -0800 Subject: [PATCH 1/5] Phylogenetic: Initialize lineage 1A and lineage 2 config file --- phylogenetic/defaults/lineage-1A/config.yaml | 85 ++++++++++++++++++++ phylogenetic/defaults/lineage-2/config.yaml | 85 ++++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 phylogenetic/defaults/lineage-1A/config.yaml create mode 100644 phylogenetic/defaults/lineage-2/config.yaml diff --git a/phylogenetic/defaults/lineage-1A/config.yaml b/phylogenetic/defaults/lineage-1A/config.yaml new file mode 100644 index 0000000..21a269f --- /dev/null +++ b/phylogenetic/defaults/lineage-1A/config.yaml @@ -0,0 +1,85 @@ +strain_id_field: "accession" +# Use 'Egypt 1951' as the reference and root, following Mencattelli et al, 2023 +# https://www.nature.com/articles/s41467-023-42185-7 +reference: "defaults/global/reference.gb" +root: "mid_point" + +# Sequences must be FASTA and metadata must be TSV +# Both files must be zstd compressed +sequences_url: "https://data.nextstrain.org/files/workflows/WNV/sequences.fasta.zst" +metadata_url: "https://data.nextstrain.org/files/workflows/WNV/metadata.tsv.zst" + +# Pull in metadata and sequences from the ingest workflow +input_metadata: "data/metadata.tsv" +input_sequences: "data/sequences.fasta" + +builds: ['global'] + +#subsampling: + #all: --min-length '9800' --query "country == 'USA' & accession != 'NC_009942'" + +# Define named subsampling groups below (e.g., "state", "country", "region", +# etc.). The workflow will run an `augur filter` command with the arguments +# defined by each named group. Each `augur filter` command operates on all +# available metadata and sequences and produces a text file containing the list +# of strain names that passed the filters. The workflow will collect the union +# of all strain names from the subsampling files and output the corresponding +# subset of metadata and sequences that will be used to build the phylogeny. +# +# As an example, we could define two named subsampling groups like the +# following: +# +# ``` +# subsampling: +# state: --query "division == 'WA'" --subsample-max-sequences 5000 +# neighboring_state: --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 +# ``` +# +# These named subsampling groups will translate to the following two `augur filter` commands: +# +# ``` +# augur filter \ +# --sequences data/sequences_all.fasta \ +# --metadata data/metadata_all.tsv \ +# --query "division == 'WA'" --subsample-max-sequences 5000 \ +# --output-strains results/subsampled_strains_state.txt +# +# augur filter \ +# --sequences data/sequences_all.fasta \ +# --metadata data/metadata_all.tsv \ +# --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 \ +# --output-strains results/subsampled_strains_neighboring_state.txt +# ``` +# +# Then, the workflow will collect the strains from each command to extract the +# corresponding metadata and sequences with the following command: +# +# ``` +# augur filter \ +# --sequences data/sequences_all.fasta \ +# --metadata data/metadata_all.tsv \ +# --exclude-all \ +# --include results/subsampled_strains_state.txt results/subsampled_strains_neighboring_state.txt \ +# --output-sequences results/sequences_filtered.fasta \ +# --output-metadata results/metadata_filtered.tsv +# ``` +# +# This command excludes all strains by default and then forces the inclusion of +# the strains selected by the subsampling logic defined above. +subsampling: + region: --query "is_lab_host != 'true'" --query-columns is_lab_host:str --min-length '8200' --group-by region year --subsample-max-sequences 3000 --exclude defaults/exclude.txt + force_include: --exclude-all --include defaults/include.txt + +refine: + treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.000755 + +traits: + metadata_columns: [ + 'region', + 'country', + 'lineage', + ] + +export: + description: "defaults/description.md" + auspice_config: "defaults/global/auspice_config.json" diff --git a/phylogenetic/defaults/lineage-2/config.yaml b/phylogenetic/defaults/lineage-2/config.yaml new file mode 100644 index 0000000..21a269f --- /dev/null +++ b/phylogenetic/defaults/lineage-2/config.yaml @@ -0,0 +1,85 @@ +strain_id_field: "accession" +# Use 'Egypt 1951' as the reference and root, following Mencattelli et al, 2023 +# https://www.nature.com/articles/s41467-023-42185-7 +reference: "defaults/global/reference.gb" +root: "mid_point" + +# Sequences must be FASTA and metadata must be TSV +# Both files must be zstd compressed +sequences_url: "https://data.nextstrain.org/files/workflows/WNV/sequences.fasta.zst" +metadata_url: "https://data.nextstrain.org/files/workflows/WNV/metadata.tsv.zst" + +# Pull in metadata and sequences from the ingest workflow +input_metadata: "data/metadata.tsv" +input_sequences: "data/sequences.fasta" + +builds: ['global'] + +#subsampling: + #all: --min-length '9800' --query "country == 'USA' & accession != 'NC_009942'" + +# Define named subsampling groups below (e.g., "state", "country", "region", +# etc.). The workflow will run an `augur filter` command with the arguments +# defined by each named group. Each `augur filter` command operates on all +# available metadata and sequences and produces a text file containing the list +# of strain names that passed the filters. The workflow will collect the union +# of all strain names from the subsampling files and output the corresponding +# subset of metadata and sequences that will be used to build the phylogeny. +# +# As an example, we could define two named subsampling groups like the +# following: +# +# ``` +# subsampling: +# state: --query "division == 'WA'" --subsample-max-sequences 5000 +# neighboring_state: --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 +# ``` +# +# These named subsampling groups will translate to the following two `augur filter` commands: +# +# ``` +# augur filter \ +# --sequences data/sequences_all.fasta \ +# --metadata data/metadata_all.tsv \ +# --query "division == 'WA'" --subsample-max-sequences 5000 \ +# --output-strains results/subsampled_strains_state.txt +# +# augur filter \ +# --sequences data/sequences_all.fasta \ +# --metadata data/metadata_all.tsv \ +# --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 \ +# --output-strains results/subsampled_strains_neighboring_state.txt +# ``` +# +# Then, the workflow will collect the strains from each command to extract the +# corresponding metadata and sequences with the following command: +# +# ``` +# augur filter \ +# --sequences data/sequences_all.fasta \ +# --metadata data/metadata_all.tsv \ +# --exclude-all \ +# --include results/subsampled_strains_state.txt results/subsampled_strains_neighboring_state.txt \ +# --output-sequences results/sequences_filtered.fasta \ +# --output-metadata results/metadata_filtered.tsv +# ``` +# +# This command excludes all strains by default and then forces the inclusion of +# the strains selected by the subsampling logic defined above. +subsampling: + region: --query "is_lab_host != 'true'" --query-columns is_lab_host:str --min-length '8200' --group-by region year --subsample-max-sequences 3000 --exclude defaults/exclude.txt + force_include: --exclude-all --include defaults/include.txt + +refine: + treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.000755 + +traits: + metadata_columns: [ + 'region', + 'country', + 'lineage', + ] + +export: + description: "defaults/description.md" + auspice_config: "defaults/global/auspice_config.json" From 54ebbf7b42cb5a4e519481ad9fb867fdfa330374 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 21 Jan 2025 10:45:28 -0800 Subject: [PATCH 2/5] Phylogenetic: Update sub-lineage specific configs and auxillary files Update config values that are specific to lineage-1A and lineage-2 including * Adding auspice_config.json with differen Titles * Subsetting the include.txt to the individual lineages * Adding the lineage-2 reference.gb file from RefSeq * Clear any fixed clock value from the global build --- .../defaults/lineage-1A/auspice_config.json | 70 +++++ phylogenetic/defaults/lineage-1A/config.yaml | 87 +----- phylogenetic/defaults/lineage-1A/include.txt | 65 +++++ phylogenetic/defaults/lineage-1A/reference.gb | 257 +++++++++++++++++ .../defaults/lineage-2/auspice_config.json | 70 +++++ phylogenetic/defaults/lineage-2/config.yaml | 87 +----- phylogenetic/defaults/lineage-2/include.txt | 7 + phylogenetic/defaults/lineage-2/reference.gb | 270 ++++++++++++++++++ 8 files changed, 755 insertions(+), 158 deletions(-) create mode 100644 phylogenetic/defaults/lineage-1A/auspice_config.json create mode 100644 phylogenetic/defaults/lineage-1A/include.txt create mode 100644 phylogenetic/defaults/lineage-1A/reference.gb create mode 100644 phylogenetic/defaults/lineage-2/auspice_config.json create mode 100644 phylogenetic/defaults/lineage-2/include.txt create mode 100644 phylogenetic/defaults/lineage-2/reference.gb diff --git a/phylogenetic/defaults/lineage-1A/auspice_config.json b/phylogenetic/defaults/lineage-1A/auspice_config.json new file mode 100644 index 0000000..78c6c1f --- /dev/null +++ b/phylogenetic/defaults/lineage-1A/auspice_config.json @@ -0,0 +1,70 @@ +{ + "title": "West Nile Virus Build (lineage-1A)", + "data_provenance": [ + { + "name": "GenBank", + "url": "https://www.ncbi.nlm.nih.gov/genbank/" + } + ], + "colorings": [ + {"key": "gt", "title": "Genotype", "type": "categorical"}, + {"key": "num_date", "title": "Sampling Date", "type": "continuous"}, + {"key": "region", "title": "Region", "type": "categorical"}, + {"key": "country", "title": "Country", "type": "categorical"}, + {"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"}, + {"key": "clade_membership", "title": "Clade", "type": "categorical"}, + {"key": "author", "title": "Authors", "type": "categorical"}, + {"key": "host", "title": "Host Species", "type": "categorical"}, + {"key": "host_genus", "title": "Host Genus", "type": "categorical"}, + {"key": "host_type", "title": "Host Type", "type": "categorical"} + ], + "geo_resolutions": [ + "region", + "country" + ], + "maintainers": [ + {"name": "Marcela Torres", "url": "https://github.com/NW-PaGe/WNV-nextstrain"}, + {"name": "NW-PaGe", "url": "https://github.com/NW-PaGe/WNV-nextstrain"}, + {"name": "Nextstrain Team", "url": "https://next.nextstrain.org/"} + ], + "filters": [ + "region", + "country", + "division", + "author", + "host", + "host_genus", + "host_type" + ], + "display_defaults": { + "color_by": "region", + "map_triplicate": true, + "geo_resolution": "country", + "distance_measure": "div" + }, + "metadata_columns": [ + "accession", + "division", + "url" + ], + "extensions": { + "nextclade": { + "clade_node_attrs": [ + { + "name": "lineage", + "displayName": "Pathoplexus lineage", + "description": "Global WNV lineages based on pathoplexus results." + } + ], + "pathogen": { + "schemaVersion":"3.0.0", + "defaultCds": "env", + "attributes": { + "name": "West Nile Virus Global Tree", + "reference name": "Reconstructed ancestor from global tree", + "reference accession": "none" + } + } + } + } +} diff --git a/phylogenetic/defaults/lineage-1A/config.yaml b/phylogenetic/defaults/lineage-1A/config.yaml index 21a269f..429d004 100644 --- a/phylogenetic/defaults/lineage-1A/config.yaml +++ b/phylogenetic/defaults/lineage-1A/config.yaml @@ -1,85 +1,14 @@ -strain_id_field: "accession" -# Use 'Egypt 1951' as the reference and root, following Mencattelli et al, 2023 -# https://www.nature.com/articles/s41467-023-42185-7 -reference: "defaults/global/reference.gb" -root: "mid_point" +reference: "defaults/lineage-1A/reference.gb" +root: "best" +builds: ['lineage-1A'] -# Sequences must be FASTA and metadata must be TSV -# Both files must be zstd compressed -sequences_url: "https://data.nextstrain.org/files/workflows/WNV/sequences.fasta.zst" -metadata_url: "https://data.nextstrain.org/files/workflows/WNV/metadata.tsv.zst" - -# Pull in metadata and sequences from the ingest workflow -input_metadata: "data/metadata.tsv" -input_sequences: "data/sequences.fasta" - -builds: ['global'] - -#subsampling: - #all: --min-length '9800' --query "country == 'USA' & accession != 'NC_009942'" - -# Define named subsampling groups below (e.g., "state", "country", "region", -# etc.). The workflow will run an `augur filter` command with the arguments -# defined by each named group. Each `augur filter` command operates on all -# available metadata and sequences and produces a text file containing the list -# of strain names that passed the filters. The workflow will collect the union -# of all strain names from the subsampling files and output the corresponding -# subset of metadata and sequences that will be used to build the phylogeny. -# -# As an example, we could define two named subsampling groups like the -# following: -# -# ``` -# subsampling: -# state: --query "division == 'WA'" --subsample-max-sequences 5000 -# neighboring_state: --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 -# ``` -# -# These named subsampling groups will translate to the following two `augur filter` commands: -# -# ``` -# augur filter \ -# --sequences data/sequences_all.fasta \ -# --metadata data/metadata_all.tsv \ -# --query "division == 'WA'" --subsample-max-sequences 5000 \ -# --output-strains results/subsampled_strains_state.txt -# -# augur filter \ -# --sequences data/sequences_all.fasta \ -# --metadata data/metadata_all.tsv \ -# --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 \ -# --output-strains results/subsampled_strains_neighboring_state.txt -# ``` -# -# Then, the workflow will collect the strains from each command to extract the -# corresponding metadata and sequences with the following command: -# -# ``` -# augur filter \ -# --sequences data/sequences_all.fasta \ -# --metadata data/metadata_all.tsv \ -# --exclude-all \ -# --include results/subsampled_strains_state.txt results/subsampled_strains_neighboring_state.txt \ -# --output-sequences results/sequences_filtered.fasta \ -# --output-metadata results/metadata_filtered.tsv -# ``` -# -# This command excludes all strains by default and then forces the inclusion of -# the strains selected by the subsampling logic defined above. subsampling: - region: --query "is_lab_host != 'true'" --query-columns is_lab_host:str --min-length '8200' --group-by region year --subsample-max-sequences 3000 --exclude defaults/exclude.txt - force_include: --exclude-all --include defaults/include.txt + region: --query "is_lab_host != 'true' & lineage == '1A'" --query-columns is_lab_host:str --min-length '8200' --group-by region year --subsample-max-sequences 3000 --exclude defaults/exclude.txt + force_include: --exclude-all --include defaults/lineage-1A/include.txt +# Clock rate from Table 1 of May et al, 2010: https://pmc.ncbi.nlm.nih.gov/articles/PMC3067944/ refine: - treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.000755 - -traits: - metadata_columns: [ - 'region', - 'country', - 'lineage', - ] + treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.00106 export: - description: "defaults/description.md" - auspice_config: "defaults/global/auspice_config.json" + auspice_config: "defaults/lineage-1A/auspice_config.json" diff --git a/phylogenetic/defaults/lineage-1A/include.txt b/phylogenetic/defaults/lineage-1A/include.txt new file mode 100644 index 0000000..a674b7f --- /dev/null +++ b/phylogenetic/defaults/lineage-1A/include.txt @@ -0,0 +1,65 @@ +NC_009942 # Lineage 1 reference +AF481864 # pre-NY +MH166901 # NY99 +MH166903 # NY99 +MH166904 # NY99 +KX547395 # NY99 +KX547519 # NY99 +KX547602 # NY99 +HM488130 # NY99 +HM488132 # NY99 +HQ671707 # NY99 +AF202541 # NY99 +AF206518 # NY99 +HM488127 # NY99 +HM488126 # NY99 +KX547410 # WN02 +KJ501434 # WN02 +KX547456 # WN02 +KY216155 # WN02 +KX547460 # WN02 +MF175829 # WN02 +KX547482 # WN02 +MF175827 # WN02 +MF175839 # WN02 +KT020853 # WN02 +KX547548 # WN02 +MF175863 # WN02 +KX547286 # WN02 +MF175873 # WN02 +MF175865 # WN02 +MF175831 # WN02 +MF175858 # WN02 +KJ501117 # SW03 +KJ501120 # SW03 +MF175815 # SW03 +MG004533 # SW03 +KF704147 # SW03 +KF704153 # SW03 +KR348940 # SW03 +KR348937 # SW03 +KX547361 # SW03 +JX015523 # SW03 +KR348944 # SW03 +KJ501124 # SW03 +KX547552 # SW03 +KJ145829 # SW03 +KR348981 # SW03 +KJ501118 # SW03 +KR348938 # SW03 +KR348976 # SW03 +KJ501170 # SW03 +KR348993 # SW03 +JQ700438 # SW03 +KR348977 # SW03 +KR348942 # SW03 +KR348941 # SW03 +KJ501121 # SW03 +KJ501122 # SW03 +KX547375 # SW03 +KM012172 # SW03 +KC333375 # SW03 +KJ501222 # SW03 +MG004537 # SW03 +MF175866 # SW03 +MG004540 # SW03 diff --git a/phylogenetic/defaults/lineage-1A/reference.gb b/phylogenetic/defaults/lineage-1A/reference.gb new file mode 100644 index 0000000..9023c75 --- /dev/null +++ b/phylogenetic/defaults/lineage-1A/reference.gb @@ -0,0 +1,257 @@ +LOCUS NC_009942_REF 11029 bp RNA linear VRL 24-AUG-2016 +DEFINITION West Nile virus lineage 1, complete genome. +ACCESSION NC_009942_REF +VERSION NC_009942.1 +DBLINK BioProject: PRJNA30293 +KEYWORDS RefSeq. +SOURCE West Nile virus (WNV) + ORGANISM West Nile virus + Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA + stage; Flaviviridae; Flavivirus; Japanese encephalitis virus group. +REFERENCE 1 (sites) + AUTHORS Faggioni,G., Pomponi,A., De Santis,R., Masuelli,L., + Ciammaruconi,A., Monaco,F., Di Gennaro,A., Marzocchella,L., + Sambri,V., Lelli,R., Rezza,G., Bei,R. and Lista,F. + TITLE West Nile alternative open reading frame (N-NS4B/WARF4) is produced + in infected West Nile Virus (WNV) cells and induces humoral + response in WNV infected individuals + JOURNAL Virol. J. 9, 283 (2012) + PUBMED 23173701 + REMARK Publication Status: Online-Only +REFERENCE 2 (bases 1 to 11029) + AUTHORS Borisevich,V., Seregin,A., Nistler,R., Mutabazi,D. and + Yamshchikov,V. + TITLE Biological properties of chimeric West Nile viruses + JOURNAL Virology 349 (2), 371-381 (2006) + PUBMED 16545851 +REFERENCE 3 (bases 1 to 11029) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (19-OCT-2007) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 4 (bases 1 to 11029) + AUTHORS Borisevich,V., Seregin,A. and Yamshchikov,V. + TITLE Direct Submission + JOURNAL Submitted (19-SEP-2005) Molecular Biosciences, University of + Kansas, 1200 Sunnyside Ave, Lawrence, KS 66045, USA. +COMMENT VALIDATED REFSEQ: This record has undergone validation or + preliminary review. The reference sequence was derived from + DQ211652. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..11029 + /organism="West Nile virus" + /mol_type="genomic RNA" + /strain="NY99" + /isolate="385-99" + /db_xref="taxon:11082" + /country="USA" + /note="lineage 1; Vero cell passage 2 after isolation" + CDS 97..465 + /gene="capsid" + CDS 466..966 + /gene="prM" + CDS 967..2469 + /gene="env" + CDS 2470..3525 + /gene="NS1" + CDS 3526..4218 + /gene="NS2A" + CDS 4219..4611 + /gene="NS2B" + CDS 4612..6468 + /gene="NS3" + CDS 6469..6846 + /gene="NS4A" + CDS 6847..6915 + /gene="2K" + CDS 6916..7680 + /gene="NS4B" + CDS 7681..10395 + /gene="NS5" +ORIGIN + 1 agtagttcgc ctgtgtgagc tgacaaactt agtagtgttt gtgaggatta acaacaatta + 61 acacagtgcg agctgtttct tagcacgaag atctcgatgt ctaagaaacc aggagggccc + 121 ggcaagagcc gggctgtcaa tatgctaaaa cgcggaatgc cccgcgtgtt gtccttgatt + 181 ggactgaaga gggctatgtt gagcctgatc gacggcaagg ggccaatacg atttgtgttg + 241 gctctcttgg cgttcttcag gttcacagca attgctccga cccgagcagt gctggatcga + 301 tggagaggtg tgaacaaaca aacagcgatg aaacaccttc tgagttttaa gaaggaacta + 361 gggaccttga ccagtgctat caatcggcgg agctcaaaac aaaagaaaag aggaggaaag + 421 accggaattg cagtcatgat tggcctgatc gccagcgtag gagcagttac cctctctaac + 481 ttccaaggga aggtgatgat gacggtaaat gctactgacg tcacagatgt catcacgatt + 541 ccaacagctg ctggaaagaa cctatgcatt gtcagagcaa tggatgtggg atacatgtgc + 601 gatgatacta tcacttatga atgcccagta ctgtcggctg gtaatgatcc agaagacatc + 661 gactgttggt gcacaaagtc agcagtctac gtcaggtatg gaagatgcac caagacacgc + 721 cactcaagac gcagtcggag gtcactgaca gtgcagacac acggagaaag cactctagcg + 781 aacaagaagg gggcttggat ggacagcacc aaggccacaa ggtatttggt aaaaacagaa + 841 tcatggatct tgaggaaccc tggatatgcc ctggtggcag ccgtcattgg ttggatgctt + 901 gggagcaaca ccatgcagag agttgtgttt gtcgtgctat tgcttttggt ggccccagct + 961 tacagcttca actgccttgg aatgagcaac agagacttct tggaaggagt gtctggagca + 1021 acatgggtgg atttggttct cgaaggcgac agctgcgtga ctatcatgtc taaggacaag + 1081 cctaccatcg atgtgaagat gatgaatatg gaggcggcca acctggcaga ggtccgcagt + 1141 tattgctatt tggctaccgt cagcgatctc tccaccaaag ctgcgtgccc gaccatggga + 1201 gaagctcaca atgacaaacg tgctgaccca gcttttgtgt gcagacaagg agtggtggac + 1261 aggggctggg gcaacggctg cggactattt ggcaaaggaa gcattgacac atgcgccaaa + 1321 tttgcctgct ctaccaaggc aataggaaga accatcttga aagagaatat caagtacgaa + 1381 gtggccattt ttgtccatgg accaactact gtggagtcgc acggaaacta ctccacacag + 1441 gttggagcca ctcaggcagg gagactcagc atcactcctg cggcgccttc atacacacta + 1501 aagcttggag aatatggaga ggtgacagtg gactgtgaac cacggtcagg gattgacacc + 1561 aatgcatact acgtgatgac tgttggaaca aagacgttct tggtccatcg tgagtggttc + 1621 atggacctca acctcccttg gagcagtgct ggaagtactg tgtggaggaa cagagagacg + 1681 ttaatggagt ttgaggaacc acacgccacg aagcagtctg tgatagcatt gggctcacaa + 1741 gagggagctc tgcatcaagc tttggctgga gccattcctg tggaattttc aagcaacact + 1801 gtcaagttga cgtcgggtca tttgaagtgt agagtgaaga tggaaaaatt gcagttgaag + 1861 ggaacaacct atggcgtctg ttcaaaggct ttcaagtttc ttgggactcc cgcagacaca + 1921 ggtcacggca ctgtggtgtt ggaattgcag tacactggca cggatggacc ttgcaaagtt + 1981 cctatctcgt cagtggcttc attgaacgac ctaacgccag tgggcagatt ggtcactgtc + 2041 aacccttttg tttcagtggc cacggccaac gctaaggtcc tgattgaatt ggaaccaccc + 2101 tttggagact catacatagt ggtgggcaga ggagaacaac agatcaatca ccattggcac + 2161 aagtctggaa gcagcattgg caaagccttt acaaccaccc tcaaaggagc gcagagacta + 2221 gccgctctag gagacacagc ttgggacttt ggatcagttg gaggggtgtt cacctcagtt + 2281 gggaaggctg tccatcaagt gttcggagga gcattccgct cactgttcgg aggcatgtcc + 2341 tggataacgc aaggattgct gggggctctc ctgttgtgga tgggcatcaa tgctcgtgat + 2401 aggtccatag ctctcacgtt tctcgcagtt ggaggagttc tgctcttcct ctccgtgaac + 2461 gtgcacgctg acactgggtg tgccatagac atcagccggc aagagctgag atgtggaagt + 2521 ggagtgttca tacacaatga tgtggaggct tggatggacc ggtacaagta ttaccctgaa + 2581 acgccacaag gcctagccaa gatcattcag aaagctcata aggaaggagt gtgcggtcta + 2641 cgatcagttt ccagactgga gcatcaaatg tgggaagcag tgaaggacga gctgaacact + 2701 cttttgaagg agaatggtgt ggaccttagt gtcgtggttg agaaacagga gggaatgtac + 2761 aagtcagcac ctaaacgcct caccgccacc acggaaaaat tggaaattgg ctggaaggcc + 2821 tggggaaaga gtattttatt tgcaccagaa ctcgccaaca acacctttgt ggttgatggt + 2881 ccggagacca aggaatgtcc gactcagaat cgcgcttgga atagcttaga agtggaggat + 2941 tttggatttg gtctcaccag cactcggatg ttcctgaagg tcagagagag caacacaact + 3001 gaatgtgact cgaagatcat tggaacggct gtcaagaaca acttggcgat ccacagtgac + 3061 ctgtcctatt ggattgaaag caggctcaat gatacgtgga agcttgaaag ggcagttctg + 3121 ggtgaagtca aatcatgtac gtggcctgag acgcatacct tgtggggcga tggaatcctt + 3181 gagagtgact tgataatacc agtcacactg gcgggaccac gaagcaatca caatcggaga + 3241 cctgggtaca agacacaaaa ccagggccca tgggacgaag gccgggtaga gattgacttc + 3301 gattactgcc caggaactac ggtcaccctg agtgagagct gcggacaccg tggacctgcc + 3361 actcgcacca ccacagagag cggaaagttg ataacagatt ggtgctgcag gagctgcacc + 3421 ttaccaccac tgcgctacca aactgacagc ggctgttggt atggtatgga gatcagacca + 3481 cagagacatg atgaaaagac cctcgtgcag tcacaagtga atgcttataa tgctgatatg + 3541 attgaccctt ttcagttggg ccttctggtc gtgttcttgg ccacccagga ggtccttcgc + 3601 aagaggtgga cagccaagat cagcatgcca gctatactga ttgctctgct agtcctggtg + 3661 tttgggggca ttacttacac tgatgtgtta cgctatgtca tcttggtggg ggcagctttc + 3721 gcagaatcta attcgggagg agacgtggta cacttggcgc tcatggcgac cttcaagata + 3781 caaccagtgt ttatggtggc atcgtttctc aaagcgagat ggaccaacca ggagaacatt + 3841 ttgttgatgt tggcggctgt tttctttcaa atggcttatc acgatgcccg ccaaattctg + 3901 ctctgggaga tccctgatgt gttgaattca ctggcggtag cttggatgat actgagagcc + 3961 ataacattca caacgacatc aaacgtggtt gttccgctgc tagccctgct aacacccggg + 4021 ctgagatgct tgaatctgga tgtgtacagg atactgctgt tgatggtcgg aataggcagc + 4081 ttgatcaggg agaagaggag tgcagccgca aaaaagaaag gagcaagtct gctatgcttg + 4141 gctctagcct caacaggact tttcaacccc atgatccttg ctgctggact gattgcatgt + 4201 gatcccaacc gtaaacgcgg atggcccgca actgaagtga tgacagctgt cggcctaatg + 4261 tttgccatcg tcggagggct ggcagagctt gacattgact ccatggccat tccaatgact + 4321 atcgcggggc tcatgtttgc tgctttcgtg atttctggga aatcaacaga tatgtggatt + 4381 gagagaacgg cggacatttc ctgggaaagt gatgcagaaa ttacaggctc gagcgaaaga + 4441 gttgatgtgc ggcttgatga tgatggaaac ttccagctca tgaatgatcc aggagcacct + 4501 tggaagatat ggatgctcag aatggtctgt ctcgcgatta gtgcgtacac cccctgggca + 4561 atcttgccct cagtagttgg attttggata actctccaat acacaaagag aggaggcgtg + 4621 ttgtgggaca ctccctcacc aaaggagtac aaaaaggggg acacgaccac cggcgtctac + 4681 aggatcatga ctcgtgggct gctcggcagt tatcaagcag gagcgggcgt gatggttgaa + 4741 ggtgttttcc acaccctttg gcatacaaca aaaggagccg ctttgatgag cggagagggc + 4801 cgcctggacc catactgggg cagtgtcaag gaggatcgac tttgttacgg aggaccctgg + 4861 aaattgcagc acaagtggaa cgggcaggat gaggtgcaga tgattgtggt ggaacctggc + 4921 aagaacgtta agaacgtcca gacgaaacca ggggtgttca aaacacctga aggagaaatc + 4981 ggggccgtga ctttggactt ccccactgga acatcaggct caccaatagt ggacaaaaac + 5041 ggtgatgtga ttgggcttta tggcaatgga gtcataatgc ccaacggctc atacataagc + 5101 gcgatagtgc agggtgaaag gatggatgag ccaatcccag ccggattcga acctgagatg + 5161 ctgaggaaaa aacagatcac tgtactggat ctccatcccg gcgccggtaa aacaaggagg + 5221 attctgccac agatcatcaa agaggccata aacagaagac tgagaacagc cgtgctagcg + 5281 ccaaccaggg ttgtggctgc tgagatggct gaagcactga gaggactgcc catccggtac + 5341 cagacatccg cagtgcccag agaacataat ggaaatgaga ttgttgatgt catgtgtcat + 5401 gctaccctca cccacaggct gatgtctcct cacagggtgc cgaactacaa cctgttcgtg + 5461 atggatgagg ctcatttcac cgacccagct agcattgcag caagaggtta catttccaca + 5521 aaggtcgagc taggggaggc ggcggcaata ttcatgacag ccaccccacc aggcacttca + 5581 gatccattcc cagagtccaa ttcaccaatt tccgacttac agactgagat cccggatcga + 5641 gcttggaact ctggatacga atggatcaca gaatacaccg ggaagacggt ttggtttgtg + 5701 cctagtgtca agatggggaa tgagattgcc ctttgcctac aacgtgctgg aaagaaagta + 5761 gtccaattga acagaaagtc gtacgagacg gagtacccaa aatgtaagaa cgatgattgg + 5821 gactttgtta tcacaacaga catatctgaa atgggggcta acttcaaggc gagcagggtg + 5881 attgacagcc ggaagagtgt gaaaccaacc atcataacag aaggagaagg gagagtgatc + 5941 ctgggagaac catctgcagt gacagcagct agtgccgccc agagacgtgg acgtatcggt + 6001 agaaatccgt cgcaagttgg tgatgagtac tgttatgggg ggcacacgaa tgaagacgac + 6061 tcgaacttcg cccattggac tgaggcacga atcatgctgg acaacatcaa catgccaaac + 6121 ggactgatcg ctcaattcta ccaaccagag cgtgagaagg tatataccat ggatggggaa + 6181 taccggctca gaggagaaga gagaaaaaac tttctggaac tgttgaggac tgcagatctg + 6241 ccagtttggc tggcttacaa ggttgcagcg gctggagtgt cataccacga ccggaggtgg + 6301 tgctttgatg gtcctaggac aaacacaatt ttagaagaca acaacgaagt ggaagtcatc + 6361 acgaagcttg gtgaaaggaa gattctgagg ccgcgctgga ttgatgccag ggtgtactcg + 6421 gatcaccagg cactaaaggc gttcaaggac ttcgcctcgg gaaaacgttc tcagataggg + 6481 ctcattgagg ttctgggaaa gatgcctgag cacttcatgg ggaagacatg ggaagcactt + 6541 gacaccatgt acgttgtggc cactgcagag aaaggaggaa gagctcacag aatggccctg + 6601 gaggaactgc cagatgctct tcagacaatt gccttgattg ccttattgag tgtgatgacc + 6661 atgggagtat tcttcctcct catgcagcgg aagggcattg gaaagatagg tttgggaggc + 6721 gctgtcttgg gagtcgcgac ctttttctgt tggatggctg aagttccagg aacgaagatc + 6781 gccggaatgt tgctgctctc ccttctcttg atgattgtgc taattcctga gccagagaag + 6841 caacgttcgc agacagacaa ccagctagcc gtgttcctga tttgtgtcat gacccttgtg + 6901 agcgcagtgg cagccaacga gatgggttgg ctagataaga ccaagagtga cataagcagt + 6961 ttgtttgggc aaagaattga ggtcaaggag aatttcagca tgggagagtt tcttctggac + 7021 ttgaggccgg caacagcctg gtcactgtac gctgtgacaa cagcggtcct cactccactg + 7081 ctaaagcatt tgatcacgtc agattacatc aacacctcat tgacctcaat aaacgttcag + 7141 gcaagtgcac tattcacact cgcgcgaggc ttccccttcg tcgatgttgg agtgtcggct + 7201 ctcctgctag cagccggatg ctggggacaa gtcaccctca ccgttacggt aacagcggca + 7261 acactccttt tttgccacta tgcctacatg gttcccggtt ggcaagctga ggcaatgcgc + 7321 tcagcccagc ggcggacagc ggccggaatc atgaagaacg ctgtagtgga tggcatcgtg + 7381 gccacggacg tcccagaatt agagcgcacc acacccatca tgcagaagaa agttggacag + 7441 atcatgctga tcttggtgtc tctagctgca gtagtagtga acccgtctgt gaagacagta + 7501 cgagaagccg gaattttgat cacggccgca gcggtgacgc tttgggagaa tggagcaagc + 7561 tctgtttgga acgcaacaac tgccatcgga ctctgccaca tcatgcgtgg gggttggttg + 7621 tcatgtctat ccataacatg gacactcata aagaacatgg aaaaaccagg actaaaaaga + 7681 ggtggggcaa aaggacgcac cttgggagag gtttggaaag aaagactcaa ccagatgaca + 7741 aaagaagagt tcactaggta ccgcaaagag gccatcatcg aagtcgatcg ctcagcggca + 7801 aaacacgcca ggaaagaagg caatgtcact ggagggcatc cagtctctag gggcacagca + 7861 aaactgagat ggctggtcga acggaggttt ctcgaaccgg tcggaaaagt gattgacctt + 7921 ggatgtggaa gaggcggttg gtgttactat atggcaaccc aaaaaagagt ccaagaagtc + 7981 agagggtaca caaagggcgg tcccggacat gaagagcccc aactagtgca aagttatgga + 8041 tggaacattg tcaccatgaa gagtggagtg gatgtgttct acagaccttc tgagtgttgt + 8101 gacaccctcc tttgtgacat cggagagtcc tcgtcaagtg ctgaggttga agagcatagg + 8161 acgattcggg tccttgaaat ggttgaggac tggctgcacc gagggccaag ggaattttgc + 8221 gtgaaggtgc tctgtcccta catgccgaaa gtcatagaga agatggagct gctccaacgc + 8281 cggtatgggg ggggactggt cagaaaccca ctctcacgga attccacgca cgagatgtat + 8341 tgggtgagtc gagcttcagg caatgtggta cattcagtga atatgaccag ccaggtgctc + 8401 ctaggaagaa tggaaaaaag gacctggaag ggaccccaat acgaggaaga tgtaaacttg + 8461 ggaagtggaa ccagggcggt gggaaaaccc ctgctcaact cagacaccag taaaatcaag + 8521 aacaggattg aacgactcag gcgtgagtac agttcgacgt ggcaccacga tgagaaccac + 8581 ccatatagaa cctggaacta tcacggcagt tatgatgtga agcccacagg ctccgccagt + 8641 tcgctggtca atggagtggt caggctcctc tcaaaaccat gggacaccat cacgaatgtt + 8701 accaccatgg ccatgactga cactactccc ttcgggcagc agcgagtgtt caaagagaag + 8761 gtggacacga aagctcctga accgccagaa ggagtgaagt acgtgctcaa cgagaccacc + 8821 aactggttgt gggcgttttt ggccagagaa aaacgtccca gaatgtgctc tcgagaggaa + 8881 ttcataagaa aggtcaacag caatgcagct ttgggtgcca tgtttgaaga gcagaatcaa + 8941 tggaggagcg ccagagaagc agttgaagat ccaaaatttt gggagatggt ggatgaggag + 9001 cgcgaggcac atctgcgggg ggaatgtcac acttgcattt acaacatgat gggaaagaga + 9061 gagaaaaaac ccggagagtt cggaaaggcc aagggaagca gagccatttg gttcatgtgg + 9121 ctcggagctc gctttctgga gttcgaggct ctgggttttc tcaatgaaga ccactggctt + 9181 ggaagaaaga actcaggagg aggtgtcgag ggcttgggcc tccaaaaact gggttacatc + 9241 ctgcgtgaag ttggcacccg gcctgggggc aagatctatg ctgatgacac agctggctgg + 9301 gacacccgca tcacgagagc tgacttggaa aatgaagcta aggtgcttga gctgcttgat + 9361 ggggaacatc ggcgtcttgc cagggccatc attgagctca cctatcgtca caaagttgtg + 9421 aaagtgatgc gcccggctgc tgatggaaga accgtcatgg atgttatctc cagagaagat + 9481 cagaggggga gtggacaagt tgtcacctac gccctaaaca ctttcaccaa cctggccgtc + 9541 cagctggtga ggatgatgga aggggaagga gtgattggcc cagatgatgt ggagaaactc + 9601 acaaaaggga aaggacccaa agtcaggacc tggctgtttg agaatgggga agaaagactc + 9661 agccgcatgg ctgtcagtgg agatgactgt gtggtaaagc ccctggacga tcgctttgcc + 9721 acctcgctcc acttcctcaa tgctatgtca aaggttcgca aagacatcca agagtggaaa + 9781 ccgtcaactg gatggtatga ttggcagcag gttccatttt gctcaaacca tttcactgaa + 9841 ttgatcatga aagatggaag aacactggtg gttccatgcc gaggacagga tgaattggta + 9901 ggcagagctc gcatatctcc aggggccgga tggaacgtcc gcgacactgc ttgtctggct + 9961 aagtcttatg cccagatgtg gctgcttctg tacttccaca gaagagacct gcggctcatg + 10021 gccaacgcca tttgctccgc tgtccctgtg aattgggtcc ctaccggaag aaccacgtgg + 10081 tccatccatg caggaggaga gtggatgaca acagaggaca tgttggaggt ctggaaccgt + 10141 gtttggatag aggagaatga atggatggaa gacaaaaccc cagtggagaa atggagtgac + 10201 gtcccatatt caggaaaacg agaggacatc tggtgtggca gcctgattgg cacaagagcc + 10261 cgagccacgt gggcagaaaa catccaggtg gctatcaacc aagtcagagc aatcatcgga + 10321 gatgagaagt atgtggacta catgagttca ctaaagagat atgaagacac aactttggtt + 10381 gaggacacag tactgtagat atttaatcaa ttgtaaatag acaatataag tatgcataaa + 10441 agtgtagttt tatagtagta tttagtggtg ttagtgtaaa tagttaagaa aattttgagg + 10501 agaaagtcag gccgggaagt tcccgccacc ggaagttgag tagacggtgc tgcctgcgac + 10561 tcaaccccag gaggactggg tgaacaaagc cgcgaagtga tccatgtaag ccctcagaac + 10621 cgtctcggaa ggaggacccc acatgttgta acttcaaagc ccaatgtcag accacgctac + 10681 ggcgtgctac tctgcggaga gtgcagtctg cgatagtgcc ccaggaggac tgggttaaca + 10741 aaggcaaacc aacgccccac gcggccctag ccccggtaat ggtgttaacc agggcgaaag + 10801 gactagaggt tagaggagac cccgcggttt aaagtgcacg gcccagcctg gctgaagctg + 10861 taggtcaggg gaaggactag aggttagtgg agaccccgtg ccacaaaaca ccacaacaaa + 10921 acagcatatt gacacctggg atagactagg agatcttctg ctctgcacaa ccagccacac + 10981 ggcacagtgc gccgacaatg gtggctggtg gtgcgagaac acaggatct +// diff --git a/phylogenetic/defaults/lineage-2/auspice_config.json b/phylogenetic/defaults/lineage-2/auspice_config.json new file mode 100644 index 0000000..92881a4 --- /dev/null +++ b/phylogenetic/defaults/lineage-2/auspice_config.json @@ -0,0 +1,70 @@ +{ + "title": "West Nile Virus Build (lineage-2)", + "data_provenance": [ + { + "name": "GenBank", + "url": "https://www.ncbi.nlm.nih.gov/genbank/" + } + ], + "colorings": [ + {"key": "gt", "title": "Genotype", "type": "categorical"}, + {"key": "num_date", "title": "Sampling Date", "type": "continuous"}, + {"key": "region", "title": "Region", "type": "categorical"}, + {"key": "country", "title": "Country", "type": "categorical"}, + {"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"}, + {"key": "clade_membership", "title": "Clade", "type": "categorical"}, + {"key": "author", "title": "Authors", "type": "categorical"}, + {"key": "host", "title": "Host Species", "type": "categorical"}, + {"key": "host_genus", "title": "Host Genus", "type": "categorical"}, + {"key": "host_type", "title": "Host Type", "type": "categorical"} + ], + "geo_resolutions": [ + "region", + "country" + ], + "maintainers": [ + {"name": "Marcela Torres", "url": "https://github.com/NW-PaGe/WNV-nextstrain"}, + {"name": "NW-PaGe", "url": "https://github.com/NW-PaGe/WNV-nextstrain"}, + {"name": "Nextstrain Team", "url": "https://next.nextstrain.org/"} + ], + "filters": [ + "region", + "country", + "division", + "author", + "host", + "host_genus", + "host_type" + ], + "display_defaults": { + "color_by": "region", + "map_triplicate": true, + "geo_resolution": "country", + "distance_measure": "div" + }, + "metadata_columns": [ + "accession", + "division", + "url" + ], + "extensions": { + "nextclade": { + "clade_node_attrs": [ + { + "name": "lineage", + "displayName": "Pathoplexus lineage", + "description": "Lineage 2 WNV lineages based on pathoplexus results." + } + ], + "pathogen": { + "schemaVersion":"3.0.0", + "defaultCds": "env", + "attributes": { + "name": "West Nile Virus Lineage 2 Tree", + "reference name": "Reconstructed ancestor from global tree", + "reference accession": "none" + } + } + } + } +} diff --git a/phylogenetic/defaults/lineage-2/config.yaml b/phylogenetic/defaults/lineage-2/config.yaml index 21a269f..8c04355 100644 --- a/phylogenetic/defaults/lineage-2/config.yaml +++ b/phylogenetic/defaults/lineage-2/config.yaml @@ -1,85 +1,14 @@ -strain_id_field: "accession" -# Use 'Egypt 1951' as the reference and root, following Mencattelli et al, 2023 -# https://www.nature.com/articles/s41467-023-42185-7 -reference: "defaults/global/reference.gb" -root: "mid_point" +reference: "defaults/lineage-2/reference.gb" +root: "best" +builds: ['lineage-2'] -# Sequences must be FASTA and metadata must be TSV -# Both files must be zstd compressed -sequences_url: "https://data.nextstrain.org/files/workflows/WNV/sequences.fasta.zst" -metadata_url: "https://data.nextstrain.org/files/workflows/WNV/metadata.tsv.zst" - -# Pull in metadata and sequences from the ingest workflow -input_metadata: "data/metadata.tsv" -input_sequences: "data/sequences.fasta" - -builds: ['global'] - -#subsampling: - #all: --min-length '9800' --query "country == 'USA' & accession != 'NC_009942'" - -# Define named subsampling groups below (e.g., "state", "country", "region", -# etc.). The workflow will run an `augur filter` command with the arguments -# defined by each named group. Each `augur filter` command operates on all -# available metadata and sequences and produces a text file containing the list -# of strain names that passed the filters. The workflow will collect the union -# of all strain names from the subsampling files and output the corresponding -# subset of metadata and sequences that will be used to build the phylogeny. -# -# As an example, we could define two named subsampling groups like the -# following: -# -# ``` -# subsampling: -# state: --query "division == 'WA'" --subsample-max-sequences 5000 -# neighboring_state: --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 -# ``` -# -# These named subsampling groups will translate to the following two `augur filter` commands: -# -# ``` -# augur filter \ -# --sequences data/sequences_all.fasta \ -# --metadata data/metadata_all.tsv \ -# --query "division == 'WA'" --subsample-max-sequences 5000 \ -# --output-strains results/subsampled_strains_state.txt -# -# augur filter \ -# --sequences data/sequences_all.fasta \ -# --metadata data/metadata_all.tsv \ -# --query "division in ['CA', 'ID', 'OR', 'NV']" --subsample-max-sequences 5000 \ -# --output-strains results/subsampled_strains_neighboring_state.txt -# ``` -# -# Then, the workflow will collect the strains from each command to extract the -# corresponding metadata and sequences with the following command: -# -# ``` -# augur filter \ -# --sequences data/sequences_all.fasta \ -# --metadata data/metadata_all.tsv \ -# --exclude-all \ -# --include results/subsampled_strains_state.txt results/subsampled_strains_neighboring_state.txt \ -# --output-sequences results/sequences_filtered.fasta \ -# --output-metadata results/metadata_filtered.tsv -# ``` -# -# This command excludes all strains by default and then forces the inclusion of -# the strains selected by the subsampling logic defined above. subsampling: - region: --query "is_lab_host != 'true'" --query-columns is_lab_host:str --min-length '8200' --group-by region year --subsample-max-sequences 3000 --exclude defaults/exclude.txt - force_include: --exclude-all --include defaults/include.txt + region: --query "is_lab_host != 'true' & lineage == '2'" --query-columns is_lab_host:str --min-length '8200' --group-by region year --subsample-max-sequences 3000 --exclude defaults/exclude.txt + force_include: --exclude-all --include defaults/lineage-2/include.txt +# Clock rate from McMullen et al, 2013: https://pmc.ncbi.nlm.nih.gov/articles/PMC3709619/ refine: - treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.000755 - -traits: - metadata_columns: [ - 'region', - 'country', - 'lineage', - ] + treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.000273 export: - description: "defaults/description.md" - auspice_config: "defaults/global/auspice_config.json" + auspice_config: "defaults/lineage-2/auspice_config.json" diff --git a/phylogenetic/defaults/lineage-2/include.txt b/phylogenetic/defaults/lineage-2/include.txt new file mode 100644 index 0000000..e33db5b --- /dev/null +++ b/phylogenetic/defaults/lineage-2/include.txt @@ -0,0 +1,7 @@ +NC_001563 # Lineage 2 reference +MW383507 # Lineage 2 +HM147822 # Lineage 2 +GQ903680 # Lineage 2 +DQ176636 # Lineage 2 +KU978767 # Lineage 2 +HM147823 # Lineage 2 diff --git a/phylogenetic/defaults/lineage-2/reference.gb b/phylogenetic/defaults/lineage-2/reference.gb new file mode 100644 index 0000000..1dca354 --- /dev/null +++ b/phylogenetic/defaults/lineage-2/reference.gb @@ -0,0 +1,270 @@ +LOCUS NC_001563_REF 10962 bp ss-RNA linear VRL 01-AUG-2019 +DEFINITION West Nile virus lineage 2, complete genome. +ACCESSION NC_001563_REF +VERSION NC_001563.2 +DBLINK BioProject: PRJNA485481 +KEYWORDS RefSeq. +SOURCE West Nile virus (WNV) + ORGANISM West Nile virus + Viruses; Riboviria; Orthornavirae; Kitrinoviricota; Flasuviricetes; + Amarillovirales; Flaviviridae; Orthoflavivirus; Orthoflavivirus + nilense. +REFERENCE 1 (sites) + AUTHORS Melian,E.B., Hinzman,E., Nagasaki,T., Firth,A.E., Wills,N.M., + Nouwens,A.S., Blitvich,B.J., Leung,J., Funk,A., Atkins,J.F., + Hall,R. and Khromykh,A.A. + TITLE NS1' of flaviviruses in the Japanese encephalitis virus serogroup + is a product of ribosomal frameshifting and plays a role in viral + neuroinvasiveness + JOURNAL J. Virol. 84 (3), 1641-1647 (2010) + PUBMED 19906906 +REFERENCE 2 (bases 1 to 10962) + AUTHORS Yamshchikov,V.F., Wengler,G., Perelygin,A.A., Brinton,M.A. and + Compans,R.W. + TITLE An infectious clone of the West Nile flavivirus + JOURNAL Virology 281 (2), 294-304 (2001) + PUBMED 11277701 +REFERENCE 3 (bases 1 to 10962) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (09-SEP-2004) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 4 (bases 1 to 10962) + AUTHORS Yamshchikov,V.F. + TITLE Direct Submission + JOURNAL Submitted (01-DEC-2000) University of Virginia Health Sciences + Centre, Department of Internal Medicine/GI, Charlottesville, VA + 22906 +REFERENCE 5 (bases 1 to 10962) + AUTHORS Castle,E. + TITLE Direct Submission + JOURNAL Submitted (03-AUG-1993) Justus-Liebig-Universitat Giessen, Institut + fur Virologie, 35392, Giessen, Germany +COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The + reference sequence was derived from M12294. + + On Dec 3, 2000 this sequence version replaced NC_001563.1. + The West Nile viral genome consists of a 42S viral RNA. The + amino-terminal ends of the structural proteins were experimentally + determined and annotated in the authors' submission AAA48498 - see + corresponding comment there. The (putative) mature peptides that + were missing from or conflicting with the authors' annotation have + been added to this RefSeq record by the NCBI staff with the kind + help of Dr. Vladimir Yamshchikov (Southern Research Institute, + Birmingham, AL USA). + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..10962 + /organism="West Nile virus" + /mol_type="genomic RNA" + /strain="956" + /db_xref="taxon:11082" + /clone="33/G8; 34/F6" + /note="lineage 2" + CDS 97..411 + /gene="capsid" + CDS 466..966 + /gene="prM" + CDS 967..2457 + /gene="env" + CDS 2458..3513 + /gene="NS1" + CDS 3514..4206 + /gene="NS2A" + CDS 4207..4599 + /gene="NS2B" + CDS 4600..6456 + /gene="NS3" + CDS 6457..6834 + /gene="NS4A" + CDS 6835..6903 + /gene="2K" + CDS 6904..7671 + /gene="NS4B" + CDS 7672..10386 + /gene="NS5" +ORIGIN + 1 agtagttcgc ctgtgtgagc tgacaaactt agtagtgttt gtgaggatta acaacaatta + 61 acacagtgcg agctgtttct tggcacgaag atctcgatgt ctaagaaacc aggagggccc + 121 ggtaaaaacc gggctgtcaa tatgctaaaa cgcggtatgc cccgcggatt gtccttgata + 181 ggactaaaga gggctatgct gagtctgatt gacgggaagg gcccaatacg tttcgtgttg + 241 gctcttttgg cgtttttcag attcactgca atcgctccga ctcgtgcggt gctggacaga + 301 tggagaggcg tcaacaaaca aacagcaatg aagcatctct tgagtttcaa gaaagaacta + 361 ggaactctga ccagtgccat caaccgccgg agcacaaaac aaaagaaaag aggaggcaca + 421 gcgggcttta ctatcttgct tgggctgatc gcctgtgctg gagctgtgac cctctcgaac + 481 ttccagggca aagtgatgat gacagtcaat gcaaccgatg tcactgacgt gattaccatt + 541 ccaacagctg ctgggaaaaa cctgtgcatc gtaagggcta tggacgtagg atacctttgt + 601 gaggatacta tcacttatga atgtccggtc ctagctgctg gaaatgaccc tgaagacatt + 661 gactgctggt gcacgaaatc atctgtttac gtgcgctatg gaagatgcac aaaaactcgg + 721 cattcccgtc gaagcagaag gtctctgaca gtccagacac atggagaaag tacactggcc + 781 aacaagaaag gagcttggtt ggacagcaca aaagccacga gatatctggt gaagacagaa + 841 tcatggatac tgagaaaccc gggctacgcc ctcgttgcag ctgtcattgg atggatgcta + 901 ggaagcaaca caatgcaacg cgtcgtgttt gccattctat tgctcctggt ggcaccagca + 961 tacagcttca actgtttagg aatgagtaac agagacttcc tggagggagt gtctggagct + 1021 acatgggttg atctggtact ggaaggcgat agttgtgtga ccataatgtc aaaagacaag + 1081 ccaaccattg atgtcaaaat gatgaacatg gaagcagcca acctcgcaga tgtgcgcagt + 1141 tactgttacc tagcttcggt cagtgacttg tcaacaagag ctgcgtgtcc aaccatgggt + 1201 gaagcccaca acgagaaaag agctgacccc gccttcgttt gcaagcaagg cgttgtggac + 1261 agaggatggg gaaatggctg cggactgttt ggaaagggga gcattgacac atgtgcgaag + 1321 tttgcctgta caaccaaagc aactggatgg atcatccaga aggaaaacat caagtatgag + 1381 gttgccatat ttgtgcatgg cccgacgacc gttgaatctc atggcaagat aggggccacc + 1441 caggctggaa gattcagtat aactccatcg gcgccatctt acacgctaaa gttgggtgag + 1501 tatggtgagg ttacggttga ttgtgagcca cggtcaggaa tagacaccag cgcctattac + 1561 gttatgtcag ttggtgagaa gtccttcctg gttcaccgag aatggtttat ggatctgaac + 1621 ctgccatgga gcagtgctgg aagcaccacg tggaggaacc gggaaacact gatggagttt + 1681 gaagaacctc atgccaccaa acaatctgtt gtggctctag ggtcgcagga aggtgcgttg + 1741 caccaagctc tggccggagc gattcctgtt gagttctcaa gcaacactgt gaagttgaca + 1801 tcaggacatc tgaagtgtcg ggtgaagatg gagaagttgc agctgaaggg aacaacatat + 1861 ggagtatgtt caaaagcgtt caaattcgct aggactcccg ctgacactgg ccacggaacg + 1921 gtggtgttgg aactgcaata taccggaaca gacggtccct gcaaagtgcc catttcttcc + 1981 gtagcttccc tgaatgacct cacacctgtt ggaagactgg tgaccgtgaa tccatttgtg + 2041 tctgtggcca cagccaactc gaaggttttg attgaactcg aacccccgtt tggtgactct + 2101 tacatcgtgg tgggaagagg agaacagcag ataaaccatc actggcacaa atctgggagc + 2161 agcattggaa aggcctttac caccacactc agaggagctc aacgactcgc agctcttgga + 2221 gatactgctt gggattttgg atcagttgga ggggttttca cctcagtggg gaaagccata + 2281 caccaagtct ttggaggagc ttttagatca ctctttggag ggatgtcctg gatcacacag + 2341 ggacttctgg gagctcttct gttgtggatg ggaatcaatg cccgtgacag gtcaattgct + 2401 atgacgtttc ttgcggttgg aggagttttg ctcttccttt cggtcaacgt ccatgctgac + 2461 acaggctgtg ccattgatat tggcaggcaa gagctccggt gcggaagtgg agtgtttatc + 2521 cacaacgatg tggaagcctg gatggatcgt tacaagttct acccggagac gccacagggc + 2581 ctagcaaaaa ttatccagaa agcacatgca gaaggagtct gcggcttgcg ttccgtttcc + 2641 agactcgagc accaaatgtg ggaagccatt aaggatgagc tgaacaccct gttgaaagag + 2701 aatggagtcg acttgagtgt cgtggtggaa aaacagaatg ggatgtacaa agcagcacca + 2761 aaacgtttgg ctgccaccac cgaaaaactg gagatgggtt ggaaggcttg gggcaagagt + 2821 atcatctttg cgccagaact agctaacaac acctttgtca tcgacggtcc tgagactgag + 2881 gaatgcccaa cggccaaccg agcatggaac agtatggagg tagaggactt tggatttgga + 2941 ctgacaagca ctcgcatgtt cctgaggatt cgggaaacga acacaacgga atgcgactcg + 3001 aagatcatag gaaccgccgt caagaacaac atggctgtgc atagtgatct atcatactgg + 3061 atagagagcg gactcaacga cacctggaag cttgagaggg cggttctagg agaagtcaaa + 3121 tcatgcacct ggccagaaac ccacactctg tggggtgatg gagttctgga aagtgatctc + 3181 atcataccca tcaccttggc aggacccaga agcaaccaca acaggagacc agggtacaaa + 3241 actcagaacc aaggcccatg ggatgagggg cgcgtcgaga ttgactttga ctattgccca + 3301 ggaacaacag taactataag tgacagttgc gaacaccgtg gacctgcggc acgcacaacc + 3361 actgagagtg ggaagctcat cacagactgg tgctgcagaa gttgcaccct ccctccactg + 3421 cgcttccaga ctgagaatgg ctgttggtat ggaatggaaa ttcgacctac gcggcacgac + 3481 gaaaagaccc tcgtgcaatc gagagtgaat gcatacaacg ccgacatgat tgatcctttt + 3541 cagttgggcc ttatggtcgt gttcttggcc acccaggagg tccttcgcaa gaggtggacg + 3601 gccaagatca gcattccagc tatcatgctt gcactcctag tcctagtgtt tgggggtatt + 3661 acgtacactg atgtcctgcg atatgtcatt ctcgtcggcg ccgcgtttgc tgaagcaaac + 3721 tcaggaggag acgtcgtgca cttggcactt atggctacat tcaagattca accagtcttt + 3781 ctggtggctt cctttttgaa ggcaaggtgg accaaccaag agagtatttt gctcatgctt + 3841 gcagctgctt tcttccaaat ggcttactat gacgccaaga atgttctgtc atgggaagtg + 3901 cctgacgttt tgaactctct ctccgttgcg tggatgattc tcagagctat aagcttcacc + 3961 aacacttcaa atgtggtggt gccgctgctg gcccttttga cacctggatt gaaatgctta + 4021 aaccttgatg tgtacagaat tttgctactc atggttggag ttggaagcct catcaaagaa + 4081 aaaaggagct ctgcagcaaa aaagaaagga gcttgcctca tctgcctagc gctggcgtct + 4141 acaggagtgt tcaatccaat gatacttgca gctgggctaa tggcttgcga ccccaaccgc + 4201 aagcggggct ggcctgctac agaagtgatg actgcagttg gactcatgtt tgccatcgtt + 4261 gggggtctgg cagaacttga catagattct atggctatcc ccatgaccat cgccggactt + 4321 atgttcgcgg catttgtcat ctctggaaag tcaacagaca tgtggattga gaggacggct + 4381 gacattactt gggagagtga tgctgaaatc acaggctcta gcgaaagagt agatgtgagg + 4441 ctggatgatg atggaaattt tcaactgatg aatgaccccg gggcaccatg gaaaatttgg + 4501 atgcttagga tggcctgcct ggcgataagt gcctacacac cttgggcaat tctcccctcg + 4561 gtcatcggat tctggataac ccttcagtac acaaagagag gaggtgttct ttgggacaca + 4621 ccatcaccca aggagtacaa gaagggtgat accaccactg gcgtttacag aatcatgact + 4681 cgaggtctgc ttggcagtta ccaagctgga gccggagtga tggtagaggg ggtgttccac + 4741 acactatggc acaccactaa gggagctgct ctcatgagtg gtgagggacg tctggatccc + 4801 tactggggga gcgtgaaaga ggaccgactt tgctatgggg ggccatggaa actccaacat + 4861 aaatggaatg gacatgatga ggtccaaatg attgtcgtgg agccagggaa aaatgtgaaa + 4921 aacgtccaga ccaagcccgg agtgtttaag acaccagaag gagaaattgg ggcagttacg + 4981 ctagactatc ctaccggaac gtcaggttcc cccattgtag acaaaaatgg agatgtgatt + 5041 ggattgtatg ggaacggcgt catcatgcct aatggttcat acataagcgc cattgtgcaa + 5101 ggagagagaa tggaagaacc ggcaccagct ggcttcgaac ctgaaatgtt gaggaagaaa + 5161 cagatcactg tccttgatct gcaccccgga gcaggaaaga cacgcaagat acttccccaa + 5221 atcatcaagg aggccatcaa caaaagattg aggacggctg tgctggcacc caccagggtc + 5281 gttgctgctg agatgtctga ggccctgaga ggacttccca ttcggtacca aacctcagca + 5341 gtgcacagag agcacagtgg aaatgagatc gttgatgtca tgtgccatgc caccctcaca + 5401 cacaggctga tgtctccaca cagagtcccc aactacaacc tgttcataat ggatgaagcc + 5461 catttcacgg atccagcgag catcgcagcc agaggataca tagcaaccaa ggttgaattg + 5521 ggcgaagccg ccgcgatttt catgacggca acgccacccg ggacttctga cccctttcca + 5581 gagtctaatg ctcctatctc ggacatgcaa acagagatcc cagacagagc ctggaacact + 5641 ggatatgaat ggataactga gtatgttgga aagaccgttt ggtttgttcc aagtgtgaaa + 5701 atgggaaatg agattgccct ctgtctgcaa cgggcgggga agaaggttat ccagctgaac + 5761 agaaagtcct atgagacaga gtaccccaag tgtaagaacg atgattggga ttttgtcatc + 5821 accacagaca tatcagaaat gggagccaac ttcaaggcga gcagagtgat cgacagccgc + 5881 aaaagcgtga aacccaccat cattgaggaa ggtgatggaa gagtcatcct gggggaaccc + 5941 tcagccatca cggctgccag cgctgctcag cggagaggac gcataggaag aaacccatca + 6001 caagttggtg atgagtattg ctatggaggg cacacaaatg aggatgattc caactttgct + 6061 cactggacag aggctcgcat catgctagac aacatcaaca tgccgaatgg tctggtggct + 6121 caactatatc agcctgagcg cgagaaggtg tacaccatgg acggggaata caggctcaga + 6181 ggggaagaac ggaagaactt ccttgaattc ctgagaacag ctgatttacc agtctggctc + 6241 gcttacaaag tggcagcagc aggaatatca taccatgacc ggaaatggtg ctttgatgga + 6301 cctcgaacca acacgattct tgaagacaac aatgaagttg aagtcatcac gaagttgggt + 6361 gagagaaaga tcctaagacc caggtgggca gatgctagag tgtactcaga ccatcaagct + 6421 ctaaagtcct tcaaagattt tgcatcgggg aaacgatcac aaatcgggct cgttgaggtg + 6481 ctcgggagaa tgcctgaaca cttcatggtg aaaacttggg aggcattgga cacgatgtat + 6541 gtggtggcga ccgctgaaaa aggaggccga gctcacagga tggctcttga ggagctaccg + 6601 gacgcccttc agacaatagt tttgattgca ctattgagtg tgatgtcctt aggtgtgttt + 6661 tttctactca tgcaaaggaa gggcattggt aagattggct tgggaggagt aatcttagga + 6721 gctgccacat tcttctgctg gatggctgaa gtcccaggaa cgaaaatagc aggcatgctc + 6781 ctgctttccc tgctgctcat gattgttttg attccggagc cggaaaagca gcgctcacag + 6841 actgataacc agctcgccgt gttcttgatc tgtgtgctca cactggtcgg cgccgtggct + 6901 gccaatgaaa tgggctggct ggacaagacc aagaatgaca ttggcagcct gttggggcac + 6961 aggccagaag ctagagagac gaccctggga gttgagagct tcttacttga tctgcggccg + 7021 gccacggcat ggtcgctcta tgccgtaacg acagccgttc tcaccccttt gctgaagcat + 7081 ctaatcacgt cagactacat caacacttcg ttgacctcaa taaacgtcca agccagcgcg + 7141 ttgttcactt tggccagagg cttccctttt gtggacgttg gtgtgtcagc tctcttgctg + 7201 gcggtcgggt gctggggtca ggtgactctg actgtgactg tgactgcagc tgctctgctc + 7261 ttttgccact atgcttacat ggtgccaggc tggcaagcgg aagccatgcg atctgcccag + 7321 cggcggacag ctgctggcat catgaaaaat gtagtggtgg atgggatcgt ggccactgat + 7381 gtacctgaac ttgaacgaac aactccagtc atgcagaaaa aagttggaca gatcatattg + 7441 atcttggtat caatggccgc ggtggtcgtc aatccatcag tgagaaccgt cagagaggcc + 7501 ggaattctga ctacagcagc agcagtcacc ctatgggaga atggtgctag ttcagtgtgg + 7561 aatgcaacga cagctattgg cctttgtcac atcatgcgag gaggatggct ctcgtgtctc + 7621 tccatcatgt ggactctcat caaaaacatg gagaaaccag gcctcaagag gggtggagcc + 7681 aaaggacgca cgctagggga agtttggaag gagagactca accacatgac gaaggaagaa + 7741 tttaccagat acagaaaaga agccatcact gaagttgacc gctccgcagc aaaacatgct + 7801 aggagagagg gaaacatcac tggaggccac ccagtctcac ggggaaccgc gaaattacgg + 7861 tggttagtgg aaaggcgttt cctcgagcca gtgggaaagg ttgtggatct cgggtgtggt + 7921 agaggcggct ggtgctatta catggctacc cagaagaggg tacaggaagt gaaagggtac + 7981 acgaaaggag gacctggcca tgaagaacca caactggtgc agagctatgg ttggaatatt + 8041 gttaccatga agagtggagt cgacgtcttc tacagaccat cagaagcgag cgacacactg + 8101 ctctgtgaca ttggagagtc atcgtcaagt gccgaggtag aagaacaccg caccgtccgt + 8161 gtcctggaga tggtggaaga ttggttgcac agaggaccga aggaattctg catcaaagtg + 8221 ctatgccctt acatgcccaa agtgattgag aagatggaaa cactccaaag gcgatatgga + 8281 ggtggcctta taagaaaccc cctttcacgc aactctaccc atgagatgta ctgggtgagc + 8341 cacgcttcag gcaatatcgt ccactccgtc aacatgacaa gccaggtgct tctggggagg + 8401 atggaaaaga aaacatggaa gggaccccag tttgaggaag atgtcaactt gggaagtgga + 8461 acgcgggcag tagggaagcc tctcctcaat tctgatacta gcaagatcaa gaaccgaatt + 8521 gagaggctga agaaagaata cagctccaca tggcaccagg atgcgaacca cccctacagg + 8581 acctggaact accacggaag ctatgaagtg aaaccaaccg gctcagccag ctcccttgtg + 8641 aatggggtag tcagattact ctcaaaacca tgggacacta tcaccaatgt gaccacgatg + 8701 gccatgacag acaccactcc tttcggtcaa caacgagtgt tcaaggaaaa ggtggacaca + 8761 aaggctccag agcctccaga aggagtcaaa tacgtcctca atgagaccac gaactggctg + 8821 tgggcttttt tagcccgcga taagaaaccc aggatgtgtt cccgggagga atttattgga + 8881 aaagtcaaca gtaatgccgc cctaggagcg atgtttgaag aacagaacca atggaagaac + 8941 gcccgggaag ctgtagagga tccaaagttt tgggagatgg tggatgagga gcgtgaagcg + 9001 catctccgtg gagaatgcaa cacctgcatc tacaacatga tgggaaagag agagaagaag + 9061 cctggagagt tcggcaaagc taaaggcagc agagccatct ggttcatgtg gctgggggcc + 9121 cgcttcctgg agtttgaagc tctcggattc ctcaatgaag accactggct gggtaggaag + 9181 aactcaggag gaggagttga aggcttagga ctgcagaagc tcgggtacat cttgaaggaa + 9241 gttggaacaa agcctggagg aaaggtttac gctgatgata ccgcaggctg ggacacacgc + 9301 atcaccaaag ctgacctcga gaatgaagcg aaggttcttg aactgctgga tggagaacat + 9361 cgacgtttag cgcggtccat catcgagctc acataccgac acaaagtcgt gaaagtgatg + 9421 aggccagcgg ccgacgggaa aactgtgatg gacgtcatct ctagagagga tcagagagga + 9481 agcggtcagg tagtgactta cgccctgaac accttcacca atctagcagt tcagctggtc + 9541 agaatgatgg agggggaggg ggtcattgga cccgatgatg ttgaaaaact gggaaaagga + 9601 aaaggcccta aggtcagaac ctggctgttt gagaatggcg aggagcgtct cagtcgcatg + 9661 gccgtcagcg gtgatgactg cgtggtgaaa cctttggacg accgcttcgc cacatcacta + 9721 cacttcctaa atgctatgtc aaaggtccgc aaagacatcc aggaatggaa accctcgacg + 9781 gggtggtatg actggcagca ggttccattc tgttcaaacc atttcacgga actgatcatg + 9841 aaggacggca ggacgctggt ggtcccgtgt cgtggacaag acgagttgat tggacgtgcc + 9901 aggatctctc caggggctgg atggaatgtg cgcgacaccg cctgcctggc gaagtcatac + 9961 gcgcagatgt ggctgctgct ttatttccac cgtagagacc tgagattgat ggccaatgcc + 10021 atctgttccg ctgtgcctgc caactgggtt cccacagggc gtaccacttg gtcgatccac + 10081 gcaaaaggag aatggatgac gacggaagac atgctcgcag tctggaacag agtgtggatt + 10141 gaggagaatg agtggatgga agacaaaaca ccagttgaga ggtggagtga tgttccatac + 10201 tctggaaaga gagaggacat ttggtgtggc agtttgatcg gcacacgaac ccgcgccact + 10261 tgggctgaaa atatccatgt ggcaatcaat caggtccgtt cagtgattgg agaagagaag + 10321 tatgtggatt acatgagctc cttgaggagg tatgaagaca ccattgtagt ggaggacact + 10381 gttttgtaaa agatagtatt atagttagtt tagtgtaaat aggatttatt gagaatggaa + 10441 gtcaggccag attaatgctg ccaccggaag ttgagtagac ggtgctgcct gcggctcaac + 10501 cccaggagga ctgggtgacc aaagctgcga ggtgatccac gtaagccctc agaaccgtct + 10561 cggaaggagg accccacgtg ctttagcctc aaagcccagt gtcagaccac actttaatgt + 10621 gccactctgc ggagagtgca gtctgcgata gtgccccagg tggactgggt taacaaaggc + 10681 aaaacatcgc cccacgcggc cataaccctg gctatggtgt taaccaggga gaagggacta + 10741 gaggttagag gagaccccgc gtaaaaaagt gcacggccca acttggctga agctgtaagc + 10801 caagggaagg actagaggtt agaggagacc ccgtgccaaa aacaccaaaa gaaacagcat + 10861 attgacacct gggatagact aggggatctt ctgctctgca caaccagcca cacggcacag + 10921 tgcgccgaca taggtggctg gtggtgctag aacacaggat ct +// From 0ef4abf3f75ba305de41cfed9ca02a787af45101 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 21 Jan 2025 13:53:17 -0800 Subject: [PATCH 3/5] Automation: Build lineage-1A and lineage-2 phylogenetic trees Added two GitHub Actions in order to support building both lineage-1A and lineage-2 phylogenetic trees. For the most part these take in the config values from the global (possibly renamed to all-lineages) config file but use different references, different clock rates, different include strains, and different build titles. --- .github/workflows/phylogenetic.yaml | 61 +++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/.github/workflows/phylogenetic.yaml b/.github/workflows/phylogenetic.yaml index 420b1cf..6b7c0e2 100644 --- a/.github/workflows/phylogenetic.yaml +++ b/.github/workflows/phylogenetic.yaml @@ -106,6 +106,67 @@ jobs: phylogenetic/logs/ phylogenetic/.snakemake/log/ + + phylogenetic_lineage_1A: + needs: [set_config_overrides] + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit + with: + # Starting with the default docker runtime + # We can migrate to AWS Batch when/if we need to for more resources or if + # the job runs longer than the GH Action limit of 6 hours. + runtime: docker + env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} + CONFIG_OVERRIDES: ${{ needs.set_config_overrides.outputs.config_overrides }} + run: | + nextstrain build \ + phylogenetic \ + deploy_all \ + --configfile build-configs/nextstrain-automation/config.yaml defaults/lineage-1A/config.yaml \ + $CONFIG_OVERRIDES + # Specifying artifact name to differentiate ingest build outputs from + # the phylogenetic build outputs + artifact-name: phylogenetic-1A-build-output + artifact-paths: | + phylogenetic/auspice/ + phylogenetic/results/ + phylogenetic/benchmarks/ + phylogenetic/logs/ + phylogenetic/.snakemake/log/ + + phylogenetic_lineage_2: + needs: [set_config_overrides] + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit + with: + # Starting with the default docker runtime + # We can migrate to AWS Batch when/if we need to for more resources or if + # the job runs longer than the GH Action limit of 6 hours. + runtime: docker + env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} + CONFIG_OVERRIDES: ${{ needs.set_config_overrides.outputs.config_overrides }} + run: | + nextstrain build \ + phylogenetic \ + deploy_all \ + --configfile build-configs/nextstrain-automation/config.yaml defaults/lineage-2/config.yaml \ + $CONFIG_OVERRIDES + # Specifying artifact name to differentiate ingest build outputs from + # the phylogenetic build outputs + artifact-name: phylogenetic-2-build-output + artifact-paths: | + phylogenetic/auspice/ + phylogenetic/results/ + phylogenetic/benchmarks/ + phylogenetic/logs/ + phylogenetic/.snakemake/log/ + phylogenetic_wa: needs: [set_config_overrides] permissions: From f042bf8a5d604ac999bde578414c61a2ea4cf539 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 11 Feb 2025 13:43:16 -0800 Subject: [PATCH 4/5] Lineage 1A: Add 1B outgroup for rooting --- phylogenetic/defaults/lineage-1A/config.yaml | 4 ++-- phylogenetic/defaults/lineage-1A/include.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/phylogenetic/defaults/lineage-1A/config.yaml b/phylogenetic/defaults/lineage-1A/config.yaml index 429d004..391b862 100644 --- a/phylogenetic/defaults/lineage-1A/config.yaml +++ b/phylogenetic/defaults/lineage-1A/config.yaml @@ -1,5 +1,5 @@ reference: "defaults/lineage-1A/reference.gb" -root: "best" +root: "KX394399" builds: ['lineage-1A'] subsampling: @@ -8,7 +8,7 @@ subsampling: # Clock rate from Table 1 of May et al, 2010: https://pmc.ncbi.nlm.nih.gov/articles/PMC3067944/ refine: - treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.00106 + treetime_params: --coalescent opt --date-inference marginal --date-confidence --keep-polytomies --clock-rate 0.00106 --remove-outgroup export: auspice_config: "defaults/lineage-1A/auspice_config.json" diff --git a/phylogenetic/defaults/lineage-1A/include.txt b/phylogenetic/defaults/lineage-1A/include.txt index a674b7f..2bc4e0b 100644 --- a/phylogenetic/defaults/lineage-1A/include.txt +++ b/phylogenetic/defaults/lineage-1A/include.txt @@ -1,3 +1,4 @@ +KX394399 # Lineage 1B outgroup NC_009942 # Lineage 1 reference AF481864 # pre-NY MH166901 # NY99 From ad445727a74c1346e5c6eed93c21e56673abc049 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 21 Jan 2025 15:38:23 -0800 Subject: [PATCH 5/5] WIP: Describe the different views --- phylogenetic/defaults/description.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/phylogenetic/defaults/description.md b/phylogenetic/defaults/description.md index 38354ea..734b3aa 100644 --- a/phylogenetic/defaults/description.md +++ b/phylogenetic/defaults/description.md @@ -2,6 +2,18 @@ We gratefully acknowledge the authors, originating and submitting laboratories o Special thanks to individuals at the [Northwest Pathogen Genomics Center of Excellence](https://github.com/NW-PaGe) and [Grubaugh lab](https://grubaughlab.com/) for comments, code and suggestions. +We maintain four views of WNV evolution: + +The first is [`wnv/global`](https://next.nextstrain.org/staging/WNV/global), which focuses on broader viral diversity for all WNV sequences submitted to GenBank which contain at least 75% of the genome length. + +The second is [`wnv/wa`](https://next.nextstrain.org/staging/WNV/global), which focuses on Washington State WNV virus sequences with tiered subsampling from Washington, the surrounding states, and across the USA. + +The third is [`wnv/lineage-1A`](https://next.nextstrain.org/staging/WNV/lineage-1A), which focuses on lineage 1A and uses X as the reference. + +The third is [`wnv/lineage-2`](https://next.nextstrain.org/staging/WNV/lineage-2), which focuses on lineage 2 and uses X as the reference. + +#### Underlying data + We curate sequence data and metadata from NCBI as starting point for our analyses. For global lineage designations, we query [pathoplexus](https://pathoplexus.org/) for lineage assignments and exclusively work with NCBI-sourced records at this time. Curated sequences and metadata are available as flat files at: * [data.nextstrain.org/files/workflows/WNV/sequences.fasta.zst](https://data.nextstrain.org/files/workflows/WNV/sequences.fasta.zst)