-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Requires custom data via fauna to be present in the `ingest/fauna/data` directory. Reference genomes are from H7N9 and should be improved. Ingest: ``` cd ingest snakemake --cores 1 -pf all ``` Phylo: ``` snakemake --cores 4 -pf -s gisaid/Snakefile --configfile config/h7n6.yaml ```
- Loading branch information
1 parent
557bf38
commit 4525438
Showing
15 changed files
with
1,043 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
inputs: | ||
- name: gisaid | ||
metadata: ingest/fauna/results/metadata.tsv | ||
sequences: ingest/fauna/results/sequences_{segment}.fasta | ||
|
||
builds: | ||
- subtype: h7n6 | ||
segment: | ||
- pb2 | ||
- pb1 | ||
- pa | ||
- ha | ||
- np | ||
- na | ||
- mp | ||
- ns | ||
time: all-time | ||
|
||
subtype_lookup: | ||
h7n6: ['h7n6'] | ||
|
||
|
||
|
||
#### Config files #### | ||
reference: config/{subtype}/reference_{subtype}_{segment}.gb | ||
auspice_config: config/{subtype}/auspice_config_{subtype}.json | ||
colors: config/{subtype}/colors_{subtype}.tsv | ||
lat_longs: config/{subtype}/lat_longs_{subtype}.tsv | ||
include_strains: config/{subtype}/include_strains_{subtype}_{time}.txt | ||
dropped_strains: config/{subtype}/dropped_strains_{subtype}.txt | ||
description: config/description_gisaid.md | ||
|
||
|
||
filter: | ||
target_sequences_per_tree: | ||
"*/*/*": 3000 | ||
|
||
min_length: | ||
"*/pb2/*": 2100 | ||
"*/pb1/*": 2100 | ||
"*/pa/*": 2000 | ||
"*/ha/*": 1600 | ||
"*/np/*": 1400 | ||
"*/na/*": 1270 | ||
"*/mp/*": 900 | ||
"*/ns/*": 800 | ||
|
||
min_date: | ||
"h7n6/*/all-time": 1900 | ||
group_by: | ||
"h7n6/*/*": subtype | ||
exclude_where: | ||
host=laboratoryderived # can't use an empty string... | ||
|
||
ancestral: | ||
inference: joint | ||
root_seq: false | ||
|
||
traits: | ||
columns: | ||
"h7n6/*/*": region |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
{ | ||
"title": "Real-time tracking of influenza A/H7N6 evolution", | ||
"maintainers": [ | ||
{"name": "James Hadfield"}, | ||
{"name": "Nextstrain"} | ||
], | ||
"build_url": "https://github.com/nextstrain/avian-flu", | ||
"data_provenance": [ | ||
{ | ||
"name": "GISAID" | ||
} | ||
], | ||
"colorings": [ | ||
{ | ||
"key": "gt", | ||
"title": "Genotype", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "num_date", | ||
"title": "Date", | ||
"type": "continuous" | ||
}, | ||
{ | ||
"key": "pathogenicity", | ||
"title": "Pathogenicity", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "region", | ||
"title": "Region", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "country", | ||
"title": "Country", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "division", | ||
"title": "Admin Division", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "host", | ||
"title": "Host", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "furin_cleavage_motif", | ||
"title": "Furin Cleavage Motif", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "cleavage_site_sequence", | ||
"title": "Cleavage Site Sequence", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "originating_lab", | ||
"title": "Originating Lab", | ||
"type": "categorical" | ||
}, | ||
{ | ||
"key": "submitting_lab", | ||
"title": "Submitting Lab", | ||
"type": "categorical" | ||
} | ||
], | ||
"geo_resolutions": [ | ||
"region", | ||
"country", | ||
"division" | ||
], | ||
"display_defaults": { | ||
"map_triplicate": true, | ||
"color_by": "pathogenicity", | ||
"geo_resolution": "region" | ||
}, | ||
"filters": [ | ||
"host", | ||
"country", | ||
"division", | ||
"originating_lab", | ||
"submitting_lab" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pathogenicity LPAI #2b8cbe | ||
pathogenicity HPAI #f03b20 |
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
LOCUS KJ411975 1710 bp cRNA linear VRL 24-JUN-2014 | ||
DEFINITION Influenza A virus (A/Shanghai/01/2014(H7N9)) segment 4 | ||
hemagglutinin (HA) gene, complete cds. | ||
ACCESSION KJ411975 | ||
VERSION KJ411975.1 | ||
KEYWORDS . | ||
SOURCE Influenza A virus (A/Shanghai/01/2014(H7N9)) | ||
ORGANISM Influenza A virus (A/Shanghai/01/2014(H7N9)) | ||
Viruses; ssRNA viruses; ssRNA negative-strand viruses; | ||
Orthomyxoviridae; Influenzavirus A. | ||
REFERENCE 1 (bases 1 to 1710) | ||
AUTHORS Zhang,W., He,Y., Xu,L., Dai,F., Mei,Z., Qian,L., Xie,D., Shen,Y., | ||
Gu,Y., Zhang,Z., Yuan,Z., Jie,Z. and Hu,Y. | ||
TITLE Full-Genome Analysis of Influenza A(H7N9) Virus from Shanghai, | ||
China, 2014 | ||
JOURNAL Genome Announc 2 (3), e00578-14 (2014) | ||
PUBMED 24948761 | ||
REMARK Publication Status: Online-Only | ||
REFERENCE 2 (bases 1 to 1710) | ||
AUTHORS Zhang,W., Xu,L., Tian,D., Liu,Y. and Hu,Y. | ||
TITLE Direct Submission | ||
JOURNAL Submitted (07-FEB-2014) Pathogen Diagnosis and Biosafety | ||
Department, Shanghai Public Health Clinical Center, Fudan | ||
University, 2901 Caolang Road, Jin Shan District, Shanghai, | ||
Shanghai 201508, P.R. China | ||
COMMENT GenBank Accession Numbers KJ411975-KJ411982 represent sequences | ||
from the 8 segments of Influenza A virus | ||
(A/Shanghai/01/2014(H7N9)). | ||
|
||
##Assembly-Data-START## | ||
Sequencing Technology :: Sanger dideoxy sequencing | ||
##Assembly-Data-END## | ||
FEATURES Location/Qualifiers | ||
source 1..1710 | ||
/organism="Influenza A virus (A/Shanghai/01/2014(H7N9))" | ||
/mol_type="viral cRNA" | ||
/strain="A/Shanghai/01/2014" | ||
/serotype="H7N9" | ||
/isolation_source="sputum" | ||
/host="Homo sapiens" | ||
/db_xref="taxon:1460072" | ||
/segment="4" | ||
/country="China" | ||
/collection_date="03-Jan-2014" | ||
gene 1..1683 | ||
/gene="HA" | ||
CDS 1..1683 | ||
/gene="HA" | ||
/function="receptor binding and fusion protein" | ||
/codon_start=1 | ||
/product="hemagglutinin" | ||
/protein_id="AHK10800.1" | ||
/translation="MNTQILVFALIAIIPTNADKICLGHHAVSNGTKVNTLTERGVEV | ||
VNATETVERTNIPRICSKGKRTVDLGQCGLLGTITGPPQCDQFLEFSADLIIERREGS | ||
DVCYPGKFVNEEALRQILRESGGIDKEAMGFTYSGIRTNGATSACRRSGSSFYAEMKW | ||
LLSNTDNAAFPQMTKSYKNTRKSPALIVWGIHHSVSTAEQTKLYGSGNKLVTVGSSNY | ||
QQSFVPSPGARPQVNGLSGRIDFHWLMLNPNDTVTFSFNGAFIAPDRASFLRGKSMGI | ||
QSGVQVDANCEGDCYHSGGTIISNLPFQNIDSRAVGKCPRYVKQRSLLLATGMKNVPE | ||
IPKGRGLFGAIAGFIENGWEGLIDGWYGFRHQNAQGEGTAADYKSTQSAIDQITGKLN | ||
RIIEKTNQQFELIDNEFNEVEKQIGNVINWTRDSITEVWSYNAELLVAMENQHTIDLA | ||
DSEMDKLYERVKRQLRENAEEDGTGCFEIFHKCDDDCMASIRNNTYDHSKYREEAMQN | ||
RIQIDPVKLSSGYKDVILWFSFGASCFILLAIVMGLVFICVKNGNMRCTICI" | ||
CDS 1..54 | ||
/gene="SigPep" | ||
/product="Signal peptide" | ||
CDS 55..1017 | ||
/gene="HA1" | ||
/product="HA1" | ||
CDS 1018..1683 | ||
/gene="HA2" | ||
/product="HA2" | ||
ORIGIN | ||
1 atgaacactc aaatcctggt attcgctctg attgcgatca ttccaacaaa tgcagacaaa | ||
61 atctgcctcg gacatcatgc cgtgtcaaac ggaaccaaag taaacacatt aactgaaaga | ||
121 ggagtggaag tcgtcaatgc aactgaaaca gtggaacgaa caaacatccc caggatctgc | ||
181 tcaaaaggga aaaggacagt tgacctcggt caatgtggac tcctggggac aatcactgga | ||
241 ccacctcaat gtgaccaatt cctagaattt tcagccgatt taattattga gaggcgagaa | ||
301 ggaagtgatg tctgttatcc tgggaaattc gtgaatgaag aagctctgag gcaaattctc | ||
361 agagaatcag gcggaattga caaggaagca atgggattca catacagtgg aataagaact | ||
421 aatggagcaa ccagtgcatg taggagatca ggatcttcat tctatgcaga aatgaaatgg | ||
481 ctcctgtcaa acacagataa tgctgcattc ccgcagatga ctaagtcata taaaaataca | ||
541 agaaaaagcc cagctctaat agtatgggga atccatcatt ccgtgtcaac tgcagagcaa | ||
601 accaagctat atgggagtgg aaacaaactg gtgacagtcg ggagttctaa ttatcaacaa | ||
661 tcttttgtac cgagtccagg agcgagacca caagttaatg gtctatctgg aagaattgac | ||
721 tttcattggc taatgctaaa tcccaatgat acagtcactt tcagtttcaa tggggctttc | ||
781 atagctccag accgtgcaag cttcctgaga ggaaaatcta tgggaatcca gagtggagta | ||
841 caggttgatg ccaattgtga aggggactgc tatcatagtg gagggacaat aataagtaac | ||
901 ttgccatttc agaacataga tagcagagca gttggaaaat gtccgagata tgttaagcaa | ||
961 aggagtctgc tgctagcaac agggatgaag aatgttcctg agattccaaa gggaagaggc | ||
1021 ctatttggtg ctatagcggg tttcattgaa aatggatggg aaggcctaat tgatggttgg | ||
1081 tatggtttca gacaccagaa tgcacaggga gagggaactg ctgcagatta caaaagcact | ||
1141 caatcggcaa ttgatcaaat aacaggaaaa ttaaaccgga ttatagaaaa aaccaaccaa | ||
1201 caatttgagt tgatagacaa tgaattcaat gaggtagaga agcaaatcgg taatgtgata | ||
1261 aattggacca gagattctat aacagaagtg tggtcataca atgctgaact cttggtagca | ||
1321 atggagaacc agcatacaat tgatctggct gattcagaaa tggacaaact gtacgaacga | ||
1381 gtgaaaagac agctaagaga gaatgctgaa gaagatggca ctggttgctt tgaaatattt | ||
1441 cacaagtgtg atgatgactg tatggccagt attagaaata acacctatga tcacagcaaa | ||
1501 tacagggaag aggcaatgca aaatagaata cagattgacc cagtcaaact aagcagcggc | ||
1561 tacaaagatg tgatactttg gtttagcttc ggggcatcat gtttcatact tctagccatt | ||
1621 gtaatgggcc ttgtcttcat atgtgtaaag aatggaaaca tgcggtgcac tatttgtata | ||
1681 taagtttgga aaaaacaccc ttgtttctac | ||
// | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
LOCUS NC_026427 985 bp cRNA linear VRL 06-FEB-2015 | ||
DEFINITION Influenza A virus (A/Shanghai/02/2013(H7N9)) segment 7 matrix | ||
protein 2 (M2) and matrix protein 1 (M1) genes, complete cds. | ||
ACCESSION NC_026427 | ||
VERSION NC_026427.1 | ||
DBLINK BioProject: PRJNA274585 | ||
KEYWORDS RefSeq. | ||
SOURCE Influenza A virus (A/Shanghai/02/2013(H7N9)) | ||
ORGANISM Influenza A virus (A/Shanghai/02/2013(H7N9)) | ||
Viruses; ssRNA viruses; ssRNA negative-strand viruses; | ||
Orthomyxoviridae; Influenzavirus A. | ||
REFERENCE 1 (bases 1 to 985) | ||
AUTHORS Zhu,H., Wang,D., Kelvin,D.J., Li,L., Zheng,Z., Yoon,S.W., | ||
Wong,S.S., Farooqui,A., Wang,J., Banner,D., Chen,R., Zheng,R., | ||
Zhou,J., Zhang,Y., Hong,W., Dong,W., Cai,Q., Roehrl,M.H., | ||
Huang,S.S., Kelvin,A.A., Yao,T., Zhou,B., Chen,X., Leung,G.M., | ||
Poon,L.L., Webster,R.G., Webby,R.J., Peiris,J.S., Guan,Y. and | ||
Shu,Y. | ||
TITLE Infectivity, transmission, and pathology of human-isolated H7N9 | ||
influenza virus in ferrets and pigs | ||
JOURNAL Science 341 (6142), 183-186 (2013) | ||
PUBMED 23704376 | ||
REMARK Erratum:[Science. 2013 Aug 30;341(6149):959] | ||
REFERENCE 2 (bases 1 to 985) | ||
CONSRTM NCBI Genome Project | ||
TITLE Direct Submission | ||
JOURNAL Submitted (04-FEB-2015) National Center for Biotechnology | ||
Information, NIH, Bethesda, MD 20894, USA | ||
REFERENCE 3 (bases 1 to 985) | ||
AUTHORS Zhu,H., Wang,D., Kelvin,D.J., Li,L., Zheng,Z., Yoon,S.W., | ||
Wong,S.S., Farooqui,A., Wang,J., Banner,D., Chen,R., Zheng,R., | ||
Zhou,J., Zhang,Y., Hong,W., Dong,W., Cai,Q., Roehrl,M.A., | ||
Huang,S.H., Calvin,A.A., Yao,T., Zhou,B., Chen,X., Leung,G.M., | ||
Poon,L.L., Webster,R.G., Webby,R.J., Peiris,J.S., Guan,Y. and | ||
Shu,Y. | ||
TITLE Direct Submission | ||
JOURNAL Submitted (10-MAY-2013) Centre of Influenza Research, School of | ||
Public Health, The University of Hong Kong, 5/F, Laboratory Block, | ||
LKS Faculty of Medicine Building, 21 Sasson Road, Pokfulm, Hong | ||
Kong SAR, China | ||
COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The | ||
reference sequence is identical to KF021600. | ||
GenBank Accession Numbers KF021594-KF021601 represent sequences | ||
from the 8 segments of Influenza A virus | ||
(A/Shanghai/02/2013(H7N9)). | ||
|
||
##Assembly-Data-START## | ||
Assembly Method :: Lasergene v. 8.0 | ||
Sequencing Technology :: 454 | ||
##Assembly-Data-END## | ||
COMPLETENESS: full length. | ||
FEATURES Location/Qualifiers | ||
source 1..985 | ||
/organism="Influenza A virus (A/Shanghai/02/2013(H7N9))" | ||
/mol_type="viral cRNA" | ||
/strain="A/Shanghai/02/2013" | ||
/serotype="H7N9" | ||
/host="Homo sapiens" | ||
/db_xref="taxon:1332244" | ||
/segment="7" | ||
/country="China" | ||
/collection_date="05-Mar-2013" | ||
/note="passage details: E1" | ||
gene 1..982 | ||
/gene="M2" | ||
/db_xref="GeneID:23104232" | ||
CDS join(1..26,715..982) | ||
/gene="M2" | ||
/codon_start=1 | ||
/product="matrix protein 2" | ||
/protein_id="YP_009118477.1" | ||
/db_xref="GeneID:23104232" | ||
/translation="MSLLTEVETPTRTGWECNCSGSSEPLVVAANIIGILHLILWILD | ||
RLFFKCIYRRFKYGLKRGPSTEGMPESMREEYRQEQQNAVDVDDGHFVNIELK" | ||
gene 1..759 | ||
/gene="M1" | ||
/db_xref="GeneID:23104231" | ||
CDS 1..759 | ||
/gene="M1" | ||
/codon_start=1 | ||
/product="matrix protein 1" | ||
/protein_id="YP_009118478.1" | ||
/db_xref="GeneID:23104231" | ||
/translation="MSLLTEVETYVLSIIPSGPLKAEIAQRLEDVFAGKNADLEALME | ||
WIKTRPILSPLTKGILGFVFTLTVPSERGLQRRRFVQNALNGNGDPNNMDKAVKLYKK | ||
LKREMTFHGAKEVALSYSTGALASCMGLIYNRMGTVTAEGALGLVCATCEQIADAQHR | ||
SHRQMATTTNPLIRHENRMVLASTTAKAMEQMAGSSEQAAEAMEVASQARQMVQAMRT | ||
VGTHPNSSTGLKDDLIENLQAYQNRMGVQLQRFK" | ||
ORIGIN | ||
1 atgagtcttc taaccgaggt cgaaacgtac gttctctcta tcattccatc aggccccctc | ||
61 aaagccgaga tcgcacagag acttgaggat gtttttgcag ggaagaacgc agatctcgag | ||
121 gctctcatgg agtggataaa gacaagacca atcctgtcac ctctgactaa ggggatttta | ||
181 gggtttgtgt tcacgctcac cgtgcccagt gagcgaggac tgcagcgtag acggtttgtc | ||
241 caaaacgccc taaatgggaa tggagaccca aacaacatgg acaaggcggt taaattatac | ||
301 aagaaactga agagggaaat gacatttcat ggagcaaagg aagttgcact cagttactca | ||
361 actggtgcgc ttgccagctg catgggtctc atatacaaca gaatggggac tgtgaccgca | ||
421 gaaggggctc ttggactagt atgtgccact tgtgagcaga ttgctgacgc acaacatcgg | ||
481 tcccacaggc agatggcgac tactactaac ccactaatta ggcatgagaa tagaatggta | ||
541 ctagccagca ctacggctaa ggctatggag cagatggctg gatcaagtga acaggcagcg | ||
601 gaagccatgg aagttgcaag tcaggctagg caaatggtgc aggctatgag aacagttggg | ||
661 actcacccta actccagtac aggtctaaaa gatgatctta ttgaaaattt gcaggcctac | ||
721 cagaaccgga tgggagtgca actgcagcgg ttcaagtgag cctctagtcg ttgcagctaa | ||
781 cattattggg atattgcact tgatattgtg gattcttgat cgtcttttct tcaaatgcat | ||
841 ttatcgtcgt tttaaatacg gtttgaaaag agggccttct acggaaggaa tgcctgagtc | ||
901 tatgagggaa gaatatcggc aggaacagca gaatgctgtg gatgttgacg atggtcattt | ||
961 tgtcaacata gagctgaagt aaaaa | ||
// |
Oops, something went wrong.