From 3e0f9feaa4d1799cdda6cf839e03d09390f39c53 Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Sun, 30 Dec 2018 22:21:23 -0800 Subject: [PATCH] Initial commit including snakemake and config --- .gitignore | 41 +++++ Snakefile | 283 +++++++++++++++++++++++++++++ config/auspice_config_all.json | 52 ++++++ config/auspice_config_denv1.json | 46 +++++ config/auspice_config_denv2.json | 46 +++++ config/auspice_config_denv3.json | 46 +++++ config/auspice_config_denv4.json | 46 +++++ config/clades.tsv | 11 ++ config/dropped_strains.txt | 23 +++ config/reference_dengue_all.gb | 275 ++++++++++++++++++++++++++++ config/reference_dengue_denv1.gb | 294 ++++++++++++++++++++++++++++++ config/reference_dengue_denv2.gb | 301 +++++++++++++++++++++++++++++++ config/reference_dengue_denv3.gb | 283 +++++++++++++++++++++++++++++ config/reference_dengue_denv4.gb | 275 ++++++++++++++++++++++++++++ 14 files changed, 2022 insertions(+) create mode 100644 .gitignore create mode 100644 Snakefile create mode 100644 config/auspice_config_all.json create mode 100644 config/auspice_config_denv1.json create mode 100644 config/auspice_config_denv2.json create mode 100644 config/auspice_config_denv3.json create mode 100644 config/auspice_config_denv4.json create mode 100644 config/clades.tsv create mode 100644 config/dropped_strains.txt create mode 100644 config/reference_dengue_all.gb create mode 100644 config/reference_dengue_denv1.gb create mode 100644 config/reference_dengue_denv2.gb create mode 100644 config/reference_dengue_denv3.gb create mode 100644 config/reference_dengue_denv4.gb diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b653f7d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,41 @@ +# Files created by the pipeline, which we want to keep out of git +# (or at least out of _this_ git repo). +data/ +results/ +auspice/ +build/ + +# Sensitive environment variables +environment* + +# Snakemake state dir +/.snakemake + +# Local config overrides +/config_local.yaml + +# For Python # +############## +*.pyc +.tox/ +.cache/ + +# Compiled source # +################### +*.com +*.class +*.dll +*.exe +*.o +*.so + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +Icon? +ehthumbs.db +Thumbs.db diff --git a/Snakefile b/Snakefile new file mode 100644 index 00000000..67a06c4b --- /dev/null +++ b/Snakefile @@ -0,0 +1,283 @@ +serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4'] + +rule all: + input: + auspice_tree = expand("auspice/dengue_{serotype}_tree.json", serotype=serotypes), + auspice_meta = expand("auspice/dengue_{serotype}_meta.json", serotype=serotypes), + +rule files: + params: + dropped_strains = "config/dropped_strains.txt", + reference = "config/reference_dengue_{serotype}.gb", + clades = "config/clades.tsv", + auspice_config = "config/auspice_config_{serotype}.json" + +files = rules.files.params + +def serotype_integer(w): + serotypes = {'denv1': '1', 'denv2': '2', 'denv3': '3', 'denv4': '4'} + return serotypes[w.serotype] + +rule download: + message: "Downloading sequences from fauna" + output: + sequences = "data/dengue_{serotype}.fasta" + params: + fasta_fields = "strain virus accession collection_date region country division location source locus authors url title journal puburl", + serotype_integer = serotype_integer + shell: + """ + python3 ../fauna/vdb/download.py \ + --database vdb \ + --virus dengue \ + --fasta_fields {params.fasta_fields} \ + --select serotype:{params.serotype_integer} \ + --path $(dirname {output.sequences}) \ + --fstem $(basename {output.sequences} .fasta) + """ + +rule concat: + message: "Concatenating serotype sequences" + input: + denv1_sequences = "data/dengue_denv1.fasta", + denv2_sequences = "data/dengue_denv2.fasta", + denv3_sequences = "data/dengue_denv3.fasta", + denv4_sequences = "data/dengue_denv4.fasta" + output: + sequences = "data/dengue_all.fasta" + shell: + """ + cat {input.denv1_sequences} \ + {input.denv2_sequences} \ + {input.denv3_sequences} \ + {input.denv4_sequences} \ + > {output.sequences} + """ + +rule parse: + message: "Parsing fasta into sequences and metadata" + input: + sequences = rules.download.output.sequences + output: + sequences = "results/sequences_{serotype}.fasta", + metadata = "results/metadata_{serotype}.tsv" + params: + fasta_fields = "strain virus accession date region country division city db segment authors url title journal paper_url" + shell: + """ + augur parse \ + --sequences {input.sequences} \ + --output-sequences {output.sequences} \ + --output-metadata {output.metadata} \ + --fields {params.fasta_fields} + """ + +rule filter: + message: + """ + Filtering to + - {params.sequences_per_group} sequence(s) per {params.group_by!s} + - excluding strains in {input.exclude} + - minimum genome length of {params.min_length} + """ + input: + sequences = rules.parse.output.sequences, + metadata = rules.parse.output.metadata, + exclude = files.dropped_strains + output: + sequences = "results/filtered_{serotype}.fasta" + params: + group_by = "year", + sequences_per_group = 30, + min_length = 5000 + shell: + """ + augur filter \ + --sequences {input.sequences} \ + --metadata {input.metadata} \ + --exclude {input.exclude} \ + --output {output.sequences} \ + --group-by {params.group_by} \ + --sequences-per-group {params.sequences_per_group} \ + --min-length {params.min_length} + """ + +rule align: + message: + """ + Aligning sequences to {input.reference} + - filling gaps with N + """ + input: + sequences = rules.filter.output.sequences, + reference = files.reference + output: + alignment = "results/aligned_{serotype}.fasta" + shell: + """ + augur align \ + --sequences {input.sequences} \ + --reference-sequence {input.reference} \ + --output {output.alignment} \ + --fill-gaps \ + --remove-reference + """ + +rule tree: + message: "Building tree" + input: + alignment = rules.align.output.alignment + output: + tree = "results/tree-raw_{serotype}.nwk" + shell: + """ + augur tree \ + --alignment {input.alignment} \ + --output {output.tree} + """ + +rule refine: + message: + """ + Refining tree + - estimate timetree + - use {params.coalescent} coalescent timescale + - estimate {params.date_inference} node dates + - filter tips more than {params.clock_filter_iqd} IQDs from clock expectation + """ + input: + tree = rules.tree.output.tree, + alignment = rules.align.output, + metadata = rules.parse.output.metadata + output: + tree = "results/tree_{serotype}.nwk", + node_data = "results/branch-lengths_{serotype}.json" + params: + coalescent = "const", + date_inference = "marginal", + clock_filter_iqd = 4 + shell: + """ + augur refine \ + --tree {input.tree} \ + --alignment {input.alignment} \ + --metadata {input.metadata} \ + --output-tree {output.tree} \ + --output-node-data {output.node_data} \ + --timetree \ + --coalescent {params.coalescent} \ + --date-confidence \ + --date-inference {params.date_inference} \ + --clock-filter-iqd {params.clock_filter_iqd} + """ + +rule ancestral: + message: "Reconstructing ancestral sequences and mutations" + input: + tree = rules.refine.output.tree, + alignment = rules.align.output + output: + node_data = "results/nt-muts_{serotype}.json" + params: + inference = "joint" + shell: + """ + augur ancestral \ + --tree {input.tree} \ + --alignment {input.alignment} \ + --output {output.node_data} \ + --inference {params.inference} + """ + +rule translate: + message: "Translating amino acid sequences" + input: + tree = rules.refine.output.tree, + node_data = rules.ancestral.output.node_data, + reference = files.reference + output: + node_data = "results/aa-muts_{serotype}.json" + shell: + """ + augur translate \ + --tree {input.tree} \ + --ancestral-sequences {input.node_data} \ + --reference-sequence {input.reference} \ + --output {output.node_data} \ + """ + +rule traits: + message: + """ + Inferring ancestral traits for {params.columns!s} + - increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias + """ + input: + tree = rules.refine.output.tree, + metadata = rules.parse.output.metadata + output: + node_data = "results/traits_{serotype}.json", + params: + columns = "region country", + sampling_bias_correction = 3 + shell: + """ + augur traits \ + --tree {input.tree} \ + --metadata {input.metadata} \ + --output {output.node_data} \ + --columns {params.columns} \ + --confidence \ + --sampling-bias-correction {params.sampling_bias_correction} + """ + +rule clades: + message: "Annotating clades" + input: + tree = rules.refine.output.tree, + nt_muts = rules.ancestral.output, + aa_muts = rules.translate.output, + clades = files.clades + output: + clades = "results/clades_{serotype}.json" + shell: + """ + augur clades \ + --tree {input.tree} \ + --mutations {input.nt_muts} {input.aa_muts} \ + --clades {input.clades} \ + --output {output.clades} + """ + +rule export: + message: "Exporting data files for for auspice" + input: + tree = rules.refine.output.tree, + metadata = rules.parse.output.metadata, + branch_lengths = rules.refine.output.node_data, + traits = rules.traits.output.node_data, + clades = rules.clades.output.clades, + nt_muts = rules.ancestral.output.node_data, + aa_muts = rules.translate.output.node_data, + auspice_config = files.auspice_config + output: + auspice_tree = "auspice/dengue_{serotype}_tree.json", + auspice_meta = "auspice/dengue_{serotype}_meta.json" + shell: + """ + augur export \ + --tree {input.tree} \ + --metadata {input.metadata} \ + --node-data {input.branch_lengths} {input.traits} {input.clades} {input.nt_muts} {input.aa_muts} \ + --auspice-config {input.auspice_config} \ + --output-tree {output.auspice_tree} \ + --output-meta {output.auspice_meta} + """ + +rule clean: + message: "Removing directories: {params}" + params: + "results ", + "auspice" + shell: + "rm -rfv {params}" diff --git a/config/auspice_config_all.json b/config/auspice_config_all.json new file mode 100644 index 00000000..02551333 --- /dev/null +++ b/config/auspice_config_all.json @@ -0,0 +1,52 @@ +{ + "title": "Real-time tracking of dengue virus evolution", + "color_options": { + "gt": { + "menuItem": "genotype", + "legendTitle": "Genotype", + "type": "discrete", + "key": "genotype" + }, + "num_date": { + "menuItem": "date", + "legendTitle": "Sampling date", + "type": "continuous", + "key": "num_date" + }, + "country": { + "menuItem": "country", + "legendTitle": "Country", + "type": "discrete", + "key": "country" + }, + "region": { + "menuItem": "region", + "legendTitle": "Region", + "type": "discrete", + "key": "region" + }, + "clade_membership": { + "key": "clade_membership", + "legendTitle": "Serotype", + "menuItem": "serotype", + "type": "discrete" + } + }, + "geo": [ + "country", + "region" + ], + "defaults": { + "mapTriplicate": true, + "colorBy": "clade_membership" + }, + "maintainer": [ + "Trevor Bedford", + "http://bedford.io/team/trevor-bedford/" + ], + "filters": [ + "country", + "region", + "authors" + ] +} diff --git a/config/auspice_config_denv1.json b/config/auspice_config_denv1.json new file mode 100644 index 00000000..01099e16 --- /dev/null +++ b/config/auspice_config_denv1.json @@ -0,0 +1,46 @@ +{ + "title": "Real-time tracking of DENV1 dengue virus evolution", + "color_options": { + "gt": { + "menuItem": "genotype", + "legendTitle": "Genotype", + "type": "discrete", + "key": "genotype" + }, + "num_date": { + "menuItem": "date", + "legendTitle": "Sampling date", + "type": "continuous", + "key": "num_date" + }, + "country": { + "menuItem": "country", + "legendTitle": "Country", + "type": "discrete", + "key": "country" + }, + "region": { + "menuItem": "region", + "legendTitle": "Region", + "type": "discrete", + "key": "region" + } + }, + "geo": [ + "country", + "region" + ], + "defaults": { + "mapTriplicate": true, + "colorBy": "region" + }, + "maintainer": [ + "Trevor Bedford", + "http://bedford.io/team/trevor-bedford/" + ], + "filters": [ + "country", + "region", + "authors" + ] +} diff --git a/config/auspice_config_denv2.json b/config/auspice_config_denv2.json new file mode 100644 index 00000000..eb7b6bb5 --- /dev/null +++ b/config/auspice_config_denv2.json @@ -0,0 +1,46 @@ +{ + "title": "Real-time tracking of DENV2 dengue virus evolution", + "color_options": { + "gt": { + "menuItem": "genotype", + "legendTitle": "Genotype", + "type": "discrete", + "key": "genotype" + }, + "num_date": { + "menuItem": "date", + "legendTitle": "Sampling date", + "type": "continuous", + "key": "num_date" + }, + "country": { + "menuItem": "country", + "legendTitle": "Country", + "type": "discrete", + "key": "country" + }, + "region": { + "menuItem": "region", + "legendTitle": "Region", + "type": "discrete", + "key": "region" + } + }, + "geo": [ + "country", + "region" + ], + "defaults": { + "mapTriplicate": true, + "colorBy": "region" + }, + "maintainer": [ + "Trevor Bedford", + "http://bedford.io/team/trevor-bedford/" + ], + "filters": [ + "country", + "region", + "authors" + ] +} diff --git a/config/auspice_config_denv3.json b/config/auspice_config_denv3.json new file mode 100644 index 00000000..c648fe05 --- /dev/null +++ b/config/auspice_config_denv3.json @@ -0,0 +1,46 @@ +{ + "title": "Real-time tracking of DENV3 dengue virus evolution", + "color_options": { + "gt": { + "menuItem": "genotype", + "legendTitle": "Genotype", + "type": "discrete", + "key": "genotype" + }, + "num_date": { + "menuItem": "date", + "legendTitle": "Sampling date", + "type": "continuous", + "key": "num_date" + }, + "country": { + "menuItem": "country", + "legendTitle": "Country", + "type": "discrete", + "key": "country" + }, + "region": { + "menuItem": "region", + "legendTitle": "Region", + "type": "discrete", + "key": "region" + } + }, + "geo": [ + "country", + "region" + ], + "defaults": { + "mapTriplicate": true, + "colorBy": "region" + }, + "maintainer": [ + "Trevor Bedford", + "http://bedford.io/team/trevor-bedford/" + ], + "filters": [ + "country", + "region", + "authors" + ] +} diff --git a/config/auspice_config_denv4.json b/config/auspice_config_denv4.json new file mode 100644 index 00000000..71468ee7 --- /dev/null +++ b/config/auspice_config_denv4.json @@ -0,0 +1,46 @@ +{ + "title": "Real-time tracking of DENV4 dengue virus evolution", + "color_options": { + "gt": { + "menuItem": "genotype", + "legendTitle": "Genotype", + "type": "discrete", + "key": "genotype" + }, + "num_date": { + "menuItem": "date", + "legendTitle": "Sampling date", + "type": "continuous", + "key": "num_date" + }, + "country": { + "menuItem": "country", + "legendTitle": "Country", + "type": "discrete", + "key": "country" + }, + "region": { + "menuItem": "region", + "legendTitle": "Region", + "type": "discrete", + "key": "region" + } + }, + "geo": [ + "country", + "region" + ], + "defaults": { + "mapTriplicate": true, + "colorBy": "region" + }, + "maintainer": [ + "Trevor Bedford", + "http://bedford.io/team/trevor-bedford/" + ], + "filters": [ + "country", + "region", + "authors" + ] +} diff --git a/config/clades.tsv b/config/clades.tsv new file mode 100644 index 00000000..552c17fc --- /dev/null +++ b/config/clades.tsv @@ -0,0 +1,11 @@ +clade gene site alt +DENV1 C 70 S +DENV1 M 28 S +DENV1 NS2B 13 I +DENV2 C 10 T +DENV2 M 19 K +DENV2 NS2B 26 I +DENV3 C 12 I +DENV3 NS2A 9 T +DENV4 M 11 L +DENV4 NS1 9 S diff --git a/config/dropped_strains.txt b/config/dropped_strains.txt new file mode 100644 index 00000000..34c20f65 --- /dev/null +++ b/config/dropped_strains.txt @@ -0,0 +1,23 @@ +DENV1/FRANCE/00475/2008 +DENV1/VIETNAM/BIDV3990/2008 +DENV1/VIETNAM/BIDV992/2006 +DENV2/AUSTRALIA/QML22/2015 +DENV2/BURKINA_FASO/DAKAR2039/1980 +DENV2/COTE_D_IVOIRE/DAKAR510/1980 +DENV2/COTE_D_IVOIRE/DAKAR578/1980 +DENV2/GUINEA/PM33974/1981 +DENV2/HAITI/DENGUEVIRUS2HOMOSAPIENS1/2016 +DENV2/MALAYSIA/DKD811/2008 +DENV2/MALAYSIA/P81407/1970 +DENV2/MALAYSIA/SAB/2015 +DENV2/NIGERIA/IBH11208/1966 +DENV2/NIGERIA/IBH11234/1966 +DENV2/NIGERIA/IBH11664/1966 +DENV2/SENEGAL/0674/1970 +DENV2/SENEGAL/DAKAR0761/1974 +DENV2/SENEGAL/DAKAR141069/1999 +DENV2/SENEGAL/DAKAR141070/1999 +DENV2/SENEGAL/DAKARD75505/1999 +DENV2/TRINIDAD_AND_TOBAGO/NA/1953 +DENV4/MALAYSIA/P215/1975 +DENV4/MALAYSIA/P731120/1973 diff --git a/config/reference_dengue_all.gb b/config/reference_dengue_all.gb new file mode 100644 index 00000000..f0bc91f7 --- /dev/null +++ b/config/reference_dengue_all.gb @@ -0,0 +1,275 @@ +LOCUS DENV4/NA/REFERENCE/2003 10649 bp DNA VRL 11-FEB-2016 +DEFINITION Dengue virus 4, complete genome. +ACCESSION NC_002640 +VERSION NC_002640.1 +DBLINK BioProject:PRJNA15599 +KEYWORDS RefSeq. +SOURCE Dengue virus 4 + ORGANISM Dengue virus 4 + Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; + Flaviviridae; Flavivirus; Dengue virus group. +REFERENCE 1 (bases 1 to 10649) + AUTHORS Durbin,A.P., Karron,R.A., Sun,W., Vaughn,D.W., Reynolds,M.J., + Perreault,J.R., Men,R.H., Lai,C.J., Elkins,W.R., Chanock,R.M., + Murphy,B.R. and Whitehead,S.S. + TITLE A live attenuated dengue virus type 4 vaccine candidate with a 30 + nucleotide deletion in the 3' untranslated region is highly + attenuated and immunogenic in humans + JOURNAL Unpublished +REFERENCE 2 (bases 1 to 10649) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (12-JAN-2001) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 3 (bases 1 to 10649) + AUTHORS Whitehead,S.S. + TITLE Direct Submission + JOURNAL Submitted (08-DEC-2000) LID, NIAID, 7 Center Drive, Bethesda, MD + 20892, USA +COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final + NCBI review. The reference sequence was derived from AF326825. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..10649 + /clone="rDEN4" + /db_xref="taxon:11070" + /mol_type="genomic RNA" + /organism="Dengue virus 4" + 5'UTR 1..101 + gene 102..10265 + /db_xref="GeneID:5075729" + /gene="flavivirus polyprotein gene" + CDS 102..440 + /gene="C" + /product="anchored capsid protein C" + /protein_id="NP_740314.1" + CDS 441..938 + /gene="M" + /product="membrane glycoprotein precursor M" + /protein_id="NP_740315.1" + CDS 441..713 + /gene="pr" + /note="peptide pr" + /product="protein pr" + /protein_id="YP_009164957.1" + CDS 714..938 + /gene="flavivirus polyprotein gene" + /product="membrane glycoprotein M" + /protein_id="NP_740316.1" + CDS 939..2423 + /gene="E" + /product="envelope protein E" + /protein_id="NP_740317.1" + CDS 2424..3479 + /gene="NS1" + /product="nonstructural protein NS1" + /protein_id="NP_740318.1" + CDS 3480..4133 + /gene="NS2A" + /product="nonstructural protein NS2A" + /protein_id="NP_740319.1" + CDS 4134..4523 + /gene="NS2B" + /product="nonstructural protein NS2B" + /protein_id="NP_740320.1" + CDS 4524..6377 + /gene="NS3" + /product="nonstructural protein NS3" + /protein_id="NP_740321.1" + CDS 6378..6758 + /gene="NS4A" + /product="nonstructural protein NS4A" + /protein_id="NP_740322.1" + CDS 6759..6827 + /gene="2K" + /product="protein 2K" + /protein_id="NP_740323.1" + CDS 6828..7562 + /gene="NS4B" + /product="nonstructural protein NS4B" + /protein_id="NP_740324.1" + CDS 7563..10262 + /gene="NS5" + /product="RNA-dependent RNA polymerase NS5" + /protein_id="NP_740325.1" + 3'UTR 10266..10649 +ORIGIN + 1 agttgttagt ctgtgtggac cgacaaggac agttccaaat cggaagcttg cttaacacag + 61 ttctaacagt ttgtttgaat agagagcaga tctctggaaa aatgaaccaa cgaaaaaagg + 121 tggttagacc acctttcaat atgctgaaac gcgagagaaa ccgcgtatca acccctcaag + 181 ggttggtgaa gagattctca accggacttt tttctgggaa aggaccctta cggatggtgc + 241 tagcattcat cacgtttttg cgagtccttt ccatcccacc aacagcaggg attctgaaga + 301 gatggggaca gttgaagaaa aataaggcca tcaagatact gattggattc aggaaggaga + 361 taggccgcat gctgaacatc ttgaacggga gaaaaaggtc aacgataaca ttgctgtgct + 421 tgattcccac cgtaatggcg ttttccctca gcacaagaga tggcgaaccc ctcatgatag + 481 tggcaaaaca tgaaaggggg agacctctct tgtttaagac aacagagggg atcaacaaat + 541 gcactctcat tgccatggac ttgggtgaaa tgtgtgagga cactgtcacg tataaatgcc + 601 ccctactggt caataccgaa cctgaagaca ttgattgctg gtgcaacctc acgtctacct + 661 gggtcatgta tgggacatgc acccagagcg gagaacggag acgagagaag cgctcagtag + 721 ctttaacacc acattcagga atgggattgg aaacaagagc tgagacatgg atgtcatcgg + 781 aaggggcttg gaagcatgct cagagagtag agagctggat actcagaaac ccaggattcg + 841 cgctcttggc aggatttatg gcttatatga ttgggcaaac aggaatccag cgaactgtct + 901 tctttgtcct aatgatgctg gtcgccccat cctacggaat gcgatgcgta ggagtaggaa + 961 acagagactt tgtggaagga gtctcaggtg gagcatgggt cgacctggtg ctagaacatg + 1021 gaggatgcgt cacaaccatg gcccagggaa aaccaacctt ggattttgaa ctgactaaga + 1081 caacagccaa ggaagtggct ctgttaagaa cctattgcat tgaagcctca atatcaaaca + 1141 taactacggc aacaagatgt ccaacgcaag gagagcctta tctgaaagag gaacaggacc + 1201 aacagtacat ttgccggaga gatgtggtag acagagggtg gggcaatggc tgtggcttgt + 1261 ttggaaaagg aggagttgtg acatgtgcga agttttcatg ttcggggaag ataacaggca + 1321 atttggtcca aattgagaac cttgaataca cagtggttgt aacagtccac aatggagaca + 1381 cccatgcagt aggaaatgac acatccaatc atggagttac agccatgata actcccaggt + 1441 caccatcggt ggaagtcaaa ttgccggact atggagaact aacactcgat tgtgaaccca + 1501 ggtctggaat tgactttaat gagatgattc tgatgaaaat gaaaaagaaa acatggctcg + 1561 tgcataagca atggtttttg gatctgcctc ttccatggac agcaggagca gacacatcag + 1621 aggttcactg gaattacaaa gagagaatgg tgacatttaa ggttcctcat gccaagagac + 1681 aggatgtgac agtgctggga tctcaggaag gagccatgca ttctgccctc gctggagcca + 1741 cagaagtgga ctccggtgat ggaaatcaca tgtttgcagg acatcttaag tgcaaagtcc + 1801 gtatggagaa attgagaatc aagggaatgt catacacgat gtgttcagga aagttttcaa + 1861 ttgacaaaga gatggcagaa acacagcatg ggacaacagt ggtgaaagtc aagtatgaag + 1921 gtgctggagc tccgtgtaaa gtccccatag agataagaga tgtaaacaag gaaaaagtgg + 1981 ttgggcgtat catctcatcc acccctttgg ctgagaatac caacagtgta accaacatag + 2041 aattagaacc cccctttggg gacagctaca tagtgatagg tgttggaaac agcgcattaa + 2101 cactccattg gttcaggaaa gggagttcca ttggcaagat gtttgagtcc acatacagag + 2161 gtgcaaaacg aatggccatt ctaggtgaaa cagcttggga ttttggttcc gttggtggac + 2221 tgttcacatc attgggaaag gctgtgcacc aggtttttgg aagtgtgtat acaaccatgt + 2281 ttggaggagt ctcatggatg attagaatcc taattgggtt cttagtgttg tggattggca + 2341 cgaactcgag gaacacttca atggctatga cgtgcatagc tgttggagga atcactctgt + 2401 ttctgggctt cacagttcaa gcagacatgg gttgtgtggc gtcatggagt gggaaagaat + 2461 tgaagtgtgg aagcggaatt tttgtggttg acaacgtgca cacttggaca gaacagtaca + 2521 aatttcaacc agagtcccca gcgagactag cgtctgcaat attaaatgcc cacaaagatg + 2581 gggtctgtgg aattagatca accacgaggc tggaaaatgt catgtggaag caaataacca + 2641 acgagctaaa ctatgttctc tgggaaggag gacatgacct cactgtagtg gctggggatg + 2701 tgaagggggt gttgaccaaa ggcaagagag cactcacacc cccagtgagt gatctgaaat + 2761 attcatggaa gacatgggga aaagcaaaaa tcttcacccc agaagcaaga aatagcacat + 2821 ttttaataga cggaccagac acctctgaat gccccaatga acgaagagca tggaactctc + 2881 ttgaggtgga agactatgga tttggcatgt tcacgaccaa catatggatg aaattccgag + 2941 aaggaagttc agaagtgtgt gaccacaggt taatgtcagc tgcaattaaa gatcagaaag + 3001 ctgtgcatgc tgacatgggt tattggatag agagctcaaa aaaccagacc tggcagatag + 3061 agaaagcatc tcttattgaa gtgaaaacat gtctgtggcc caagacccac acactgtgga + 3121 gcaatggagt gctggaaagc cagatgctca ttccaaaatc atatgcgggc cctttttcac + 3181 agcacaatta ccgccagggc tatgccacgc aaaccgtggg cccatggcac ttaggcaaat + 3241 tagagataga ctttggagaa tgccccggaa caacagtcac aattcaggag gattgtgacc + 3301 atagaggccc atctttgagg accaccactg catctggaaa actagtcacg caatggtgct + 3361 gccgctcctg cacgatgcct cccttaaggt tcttgggaga agatgggtgc tggtatggga + 3421 tggagattag gcccttgagt gaaaaagaag agaacatggt caaatcacag gtgacggccg + 3481 gacagggcac atcagaaact ttttctatgg gtctgttgtg cctgaccttg tttgtggaag + 3541 aatgcttgag gagaagagtc actaggaaac acatgatatt agttgtggtg atcactcttt + 3601 gtgctatcat cctgggaggc ctcacatgga tggacttact acgagccctc atcatgttgg + 3661 gggacactat gtctggtaga ataggaggac agatccacct agccatcatg gcagtgttca + 3721 agatgtcacc aggatacgtg ctgggtgtgt ttttaaggaa actcacttca agagagacag + 3781 cactaatggt aataggaatg gccatgacaa cggtgctttc aattccacat gaccttatgg + 3841 aactcattga tggaatatca ctgggactaa ttttgctaaa aatagtaaca cagtttgaca + 3901 acacccaagt gggaacctta gctctttcct tgactttcat aagatcaaca atgccattgg + 3961 tcatggcttg gaggaccatt atggctgtgt tgtttgtggt cacactcatt cctttgtgca + 4021 ggacaagctg tcttcaaaaa cagtctcatt gggtagaaat aacagcactc atcctaggag + 4081 cccaagctct gccagtgtac ctaatgactc ttatgaaagg agcctcaaga agatcttggc + 4141 ctcttaacga gggcataatg gctgtgggtt tggttagtct cttaggaagc gctcttttaa + 4201 agaatgatgt ccctttagct ggcccaatgg tggcaggagg cttacttctg gcggcttacg + 4261 tgatgagtgg tagctcagca gatctgtcac tagagaaggc cgccaacgtg cagtgggatg + 4321 aaatggcaga cataacaggc tcaagcccaa tcgtagaagt gaagcaggat gaagatggct + 4381 ctttctccat acgggacgtc gaggaaacca atatgataac ccttttggtg aaactggcac + 4441 tgataacagt gtcaggtctc taccccttgg caattccagt cacaatgacc ttatggtaca + 4501 tgtggcaagt gaaaacacaa agatcaggag ccctgtggga cgtcccctca cccgctgcca + 4561 ctaaaaaagc cgcactgtct gaaggagtgt acaggatcat gcaaagaggg ttattcggga + 4621 aaactcaggt tggagtaggg atacacatgg aaggtgtatt tcacacaatg tggcatgtaa + 4681 caagaggatc agtgatctgc cacgagactg ggagattgga gccatcttgg gctgacgtca + 4741 ggaatgacat gatatcatac ggtgggggat ggaggcttgg agacaaatgg gacaaagaag + 4801 aagacgttca ggtcctcgcc atagaaccag gaaaaaatcc taaacatgtc caaacgaaac + 4861 ctggcctttt caagacccta actggagaaa ttggagcagt aacattagat ttcaaacccg + 4921 gaacgtctgg ttctcccatc atcaacagga aaggaaaagt catcggactc tatggaaatg + 4981 gagtagttac caaatcaggt gattacgtca gtgccataac gcaagccgaa agaattggag + 5041 agccagatta tgaagtggat gaggacattt ttcgaaagaa aagattaact ataatggact + 5101 tacaccccgg agctggaaag acaaaaagaa ttcttccatc aatagtgaga gaagccttaa + 5161 aaaggaggct acgaactttg attttagctc ccacgagagt ggtggcggcc gagatggaag + 5221 aggccctacg tggactgcca atccgttatc agaccccagc tgtgaaatca gaacacacag + 5281 gaagagagat tgtagacctc atgtgtcatg caaccttcac aacaagactt ttgtcatcaa + 5341 ccagggttcc aaattacaac cttatagtga tggatgaagc acatttcacc gatccttcta + 5401 gtgtcgcggc tagaggatac atctcgacca gggtggaaat gggagaggca gcagccatct + 5461 tcatgaccgc aacccctccc ggagcgacag atccctttcc ccagagcaac agcccaatag + 5521 aagacatcga gagggaaatt ccggaaaggt catggaacac agggttcgac tggataacag + 5581 actaccaagg gaaaactgtg tggtttgttc ccagcataaa agctggaaat gacattgcaa + 5641 attgtttgag aaagtcggga aagaaagtta tccagttgag taggaaaacc tttgatacag + 5701 agtatccaaa aacgaaactc acggactggg actttgtggt cactacagac atatctgaaa + 5761 tgggggccaa ttttagagcc gggagagtga tagaccctag aagatgcctc aagccagtta + 5821 tcctaccaga tgggccagag agagtcattt tagcaggtcc tattccagtg actccagcaa + 5881 gcgctgctca gagaagaggg cgaataggaa ggaacccagc acaagaagac gaccaatacg + 5941 ttttctccgg agacccacta aaaaatgatg aagatcatgc ccactggaca gaagcaaaga + 6001 tgctgcttga caatatctac accccagaag ggatcattcc aacattgttt ggtccggaaa + 6061 gggaaaaaac ccaagccatt gatggagagt ttcgcctcag aggggaacaa aggaagactt + 6121 ttgtggaatt aatgaggaga ggagaccttc cggtgtggct gagctataag gtagcttctg + 6181 ctggcatttc ttacgaagat cgggaatggt gcttcacagg ggaaagaaat aaccaaattt + 6241 tagaagaaaa catggaggtt gaaatttgga ctagagaggg agaaaagaaa aagctaaggc + 6301 caagatggtt agatgcacgt gtatacgctg accccatggc tttgaaggat ttcaaggagt + 6361 ttgccagtgg aaggaagagt ataactctcg acatcctaac agagattgcc agtttgccaa + 6421 cttacctttc ctctagggcc aagctcgccc ttgataacat agtcatgctc cacacaacag + 6481 aaagaggagg gagggcctat caacacgccc tgaacgaact tccggagtca ctggaaacac + 6541 tcatgcttgt agctttacta ggtgctatga cagcaggcat cttcctgttt ttcatgcaag + 6601 ggaaaggaat agggaaattg tcaatgggtt tgataaccat tgcggtggct agtggcttgc + 6661 tctgggtagc agaaattcaa ccccagtgga tagcggcctc aatcatacta gagttttttc + 6721 tcatggtact gttgataccg gaaccagaaa aacaaaggac cccacaagac aatcaattga + 6781 tctacgtcat attgaccatt ctcaccatca ttggtctaat agcagccaac gagatggggc + 6841 tgattgaaaa aacaaaaacg gattttgggt tttaccaggt aaaaacagaa accaccatcc + 6901 tcgatgtgga cttgagacca gcttcagcat ggacgctcta tgcagtagcc accacaattc + 6961 tgactcccat gctgagacac accatagaaa acacgtcggc caacctatct ctagcagcca + 7021 ttgccaacca ggcagccgtc ctaatggggc ttggaaaagg atggccgctc cacagaatgg + 7081 acctcggtgt gccgctgtta gcaatgggat gctattctca agtgaaccca acaaccttga + 7141 cagcatcctt agtcatgctt ttagtccatt atgcaataat aggcccagga ttgcaggcaa + 7201 aagccacaag agaggcccag aaaaggacag ctgctgggat catgaaaaat cccacagtgg + 7261 acgggataac agtaatagat ctagaaccaa tatcctatga cccaaaattt gaaaagcaat + 7321 tagggcaggt catgctacta gtcttgtgtg ctggacaact actcttgatg agaacaacat + 7381 gggctttctg tgaagtcttg actttggcca caggaccaat cttgaccttg tgggagggca + 7441 acccgggaag gttttggaac acgaccatag ccgtatccac cgccaacatt ttcaggggaa + 7501 gttacttggc gggagctgga ctggcttttt cactcataaa gaatgcacaa acccctagga + 7561 ggggaactgg gaccacagga gagacactgg gagagaagtg gaagagacag ctaaactcat + 7621 tagacagaaa agagtttgaa gagtataaaa gaagtggaat actagaagtg gacaggactg + 7681 aagccaagtc tgccctgaaa gatgggtcta aaatcaagca tgcagtatca agagggtcca + 7741 gtaagatcag atggattgtt gagagaggga tggtaaagcc aaaagggaaa gttgtagatc + 7801 ttggctgtgg gagaggagga tggtcttatt acatggcgac actcaagaac gtgactgaag + 7861 tgaaagggta tacaaaagga ggtccaggac atgaagaacc gattcccatg gctacttatg + 7921 gttggaattt ggtcaaactc cattcagggg ttgacgtgtt ctacaaaccc acagagcaag + 7981 tggacaccct gctctgtgat attggggagt catcttctaa tccaacaata gaggaaggaa + 8041 gaacattaag agttttgaag atggtggagc catggctctc ttcaaaacct gaattctgca + 8101 tcaaagtcct taacccctac atgccaacag tcatagaaga gctggagaaa ctgcagagaa + 8161 aacatggtgg gaaccttgtc agatgcccgc tgtccaggaa ctccacccat gagatgtatt + 8221 gggtgtcagg agcgtcggga aacattgtga gctctgtgaa cacaacatca aagatgttgt + 8281 tgaacaggtt cacaacaagg cataggaaac ccacttatga gaaggacgta gatcttgggg + 8341 caggaacgag aagtgtctcc actgaaacag aaaaaccaga catgacaatc attgggagaa + 8401 ggcttcagcg attgcaagaa gagcacaaag aaacctggca ttatgatcag gaaaacccat + 8461 acagaacctg ggcgtatcat ggaagctatg aagctccttc gacaggctct gcatcctcca + 8521 tggtgaacgg ggtggtaaaa ctgctaacaa aaccctggga tgtgattcca atggtgactc + 8581 agttagccat gacagataca accccttttg ggcaacaaag agtgttcaaa gagaaggtgg + 8641 ataccagaac accacaacca aaacccggta cacgaatggt tatgaccacg acagccaatt + 8701 ggctgtgggc cctccttgga aagaagaaaa atcccagact gtgcacaagg gaagagttca + 8761 tctcaaaagt tagatcaaac gcagccatag gcgcagtctt tcaggaagaa cagggatgga + 8821 catcagccag tgaagctgtg aatgacagcc ggttttggga actggttgac aaagaaaggg + 8881 ccctacacca ggaagggaaa tgtgaatcgt gtgtctataa catgatggga aaacgtgaga + 8941 aaaagttagg agagtttggc agagccaagg gaagccgagc aatctggtac atgtggctgg + 9001 gagcgcggtt tctggaattt gaagccctgg gttttttgaa tgaagatcac tggtttggca + 9061 gagaaaattc atggagtgga gtggaagggg aaggtctgca cagattggga tatatcctgg + 9121 aggagataga caagaaggat ggagacctaa tgtatgctga tgacacagca ggctgggaca + 9181 caagaatcac tgaggatgac cttcaaaatg aggaactgat cacggaacag atggctcccc + 9241 accacaagat cctagccaaa gccattttca aactaaccta tcaaaacaaa gtggtgaaag + 9301 tcctcagacc cacaccgcgg ggagcggtga tggatatcat atccaggaaa gaccaaagag + 9361 gtagtggaca agttggaaca tatggtttga acacattcac caacatggaa gttcaactca + 9421 tccgccaaat ggaagctgaa ggagtcatca cacaagatga catgcagaac ccaaaagggt + 9481 tgaaagaaag agttgagaaa tggctgaaag agtgtggtgt cgacaggtta aagaggatgg + 9541 caatcagtgg agacgattgc gtggtgaagc ccctagatga gaggtttggc acttccctcc + 9601 tcttcttgaa cgacatggga aaggtgagga aagacattcc gcagtgggaa ccatctaagg + 9661 gatggaaaaa ctggcaagag gttccttttt gctcccacca ctttcacaag atctttatga + 9721 aggatggccg ctcactagtt gttccatgta gaaaccagga tgaactgata gggagagcca + 9781 gaatctcgca gggagctgga tggagcttaa gagaaacagc ctgcctgggc aaagcttacg + 9841 cccagatgtg gtcgcttatg tacttccaca gaagggatct gcgtttagcc tccatggcca + 9901 tatgctcagc agttccaacg gaatggtttc caacaagcag aacaacatgg tcaatccacg + 9961 ctcatcacca gtggatgacc actgaagata tgctcaaagt gtggaacaga gtgtggatag + 10021 aagacaaccc taatatgact gacaagactc cagtccattc gtgggaagat ataccttacc + 10081 tagggaaaag agaggatttg tggtgtggat ccctgattgg actttcttcc agagccacct + 10141 gggcgaagaa cattcatacg gccataaccc aggtcaggaa cctgatcgga aaagaggaat + 10201 acgtggatta catgccagta atgaaaagat acagtgctcc ttcagagagt gaaggagttc + 10261 tgtaattacc aacaacaaac accaaaggct attgaagtca ggccacttgt gccacggttt + 10321 gagcaaaccg tgctgcctgt agctccgcca ataatgggag gcgtaataat ccccagggag + 10381 gccatgcgcc acggaagctg tacgcgtggc atattggact agcggttaga ggagacccct + 10441 cccatcactg ataaaacgca gcaaaagggg gcccgaagcc aggaggaagc tgtactcctg + 10501 gtggaaggac tagaggttag aggagacccc cccaacacaa aaacagcata ttgacgctgg + 10561 gaaagaccag agatcctgct gtctctgcaa catcaatcca ggcacagagc gccgcaagat + 10621 ggattggtgt tgttgatcca acaggttct +// diff --git a/config/reference_dengue_denv1.gb b/config/reference_dengue_denv1.gb new file mode 100644 index 00000000..e71dac21 --- /dev/null +++ b/config/reference_dengue_denv1.gb @@ -0,0 +1,294 @@ +LOCUS DENV1/NAURUISLAND/REFERENCE/1997 10735 bp DNA VRL 15-SEP-2015 +DEFINITION Dengue virus 1, complete genome. +ACCESSION NC_001477 +VERSION NC_001477.1 +DBLINK BioProject:PRJNA15306 +KEYWORDS RefSeq. +SOURCE Dengue virus 1 + ORGANISM Dengue virus 1 + Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; + Flaviviridae; Flavivirus; Dengue virus group. +REFERENCE 1 (bases 1 to 10735) + AUTHORS Puri,B., Nelson,W.M., Henchal,E.A., Hoke,C.H., Eckels,K.H., + Dubois,D.R., Porter,K.R. and Hayes,C.G. + TITLE Molecular analysis of dengue virus attenuation after serial passage + in primary dog kidney cells + JOURNAL J. Gen. Virol. 78 (PT 9), 2287-2291 (1997) + PUBMED 9292016 +REFERENCE 2 (bases 1 to 10735) + AUTHORS McKee,K.T. Jr., Bancroft,W.H., Eckels,K.H., Redfield,R.R., + Summers,P.L. and Russell,P.K. + TITLE Lack of attenuation of a candidate dengue 1 vaccine (45AZ5) in human + volunteers + JOURNAL Am. J. Trop. Med. Hyg. 36 (2), 435-442 (1987) + PUBMED 3826504 +REFERENCE 3 (bases 1 to 10735) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (01-AUG-2000) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 4 (bases 1 to 10735) + AUTHORS Puri,B. and Nelson,W.M. + TITLE Direct Submission + JOURNAL Submitted (05-FEB-1997) Inf. Dis. Dept, Naval Medical Research + Institute, 8901 Wisconsin Ave., Bethesda, MD 20889-5607, USA +COMMENT VALIDATED REFSEQ: This record has undergone validation or + preliminary review. The reference sequence was derived from U88536. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..10735 + /clone="45AZ5" + /db_xref="taxon:11053" + /mol_type="genomic RNA" + /organism="Dengue virus 1" + /type="1" + 5'UTR 1..94 + gene 95..10273 + /db_xref="GeneID:5075725" + /gene="flavivirus polyprotein gene" + CDS 95..436 + /db_xref="VBRC:35735" + /gene="C" + /product="anchored capsid protein C" + /protein_id="NP_722457.2" + CDS 437..934 + /gene="M" + /product="membrane glycoprotein precursor M" + /protein_id="NP_733807.2" + CDS 437..709 + /gene="pr" + /note="peptide pr" + /product="protein pr" + /protein_id="YP_009164956.1" + CDS 710..934 + /gene="flavivirus polyprotein gene" + /product="membrane glycoprotein M" + /protein_id="NP_722459.2" + CDS 935..2419 + /gene="E" + /product="envelope protein E" + /protein_id="NP_722460.2" + CDS 2420..3475 + /db_xref="VBRC:35739" + /gene="NS1" + /product="nonstructural protein NS1" + /protein_id="NP_722461.1" + CDS 3476..4129 + /db_xref="VBRC:35740" + /gene="NS2A" + /product="nonstructural protein NS2A" + /protein_id="NP_733808.1" + CDS 4130..4519 + /db_xref="VBRC:35741" + /gene="NS2B" + /product="nonstructural protein NS2B" + /protein_id="NP_733809.1" + CDS 4520..6376 + /db_xref="VBRC:35742" + /gene="NS3" + /product="nonstructural protein NS3" + /protein_id="NP_722463.1" + CDS 6377..6757 + /db_xref="VBRC:35743" + /gene="NS4A" + /product="nonstructural protein NS4A" + /protein_id="NP_733810.1" + CDS 6758..6826 + /db_xref="VBRC:67794" + /gene="2K" + /note="added by NCBI staff following more recent + annotations of this virus sequence" + /product="protein 2K" + /protein_id="NP_722467.1" + CDS 6827..7573 + /db_xref="VBRC:35744" + /gene="NS4B" + /product="nonstructural protein NS4B" + /protein_id="NP_733811.1" + CDS 7574..10270 + /db_xref="VBRC:35745" + /gene="NS5" + /product="RNA-dependent RNA polymerase NS5" + /protein_id="NP_722465.1" + 3'UTR 10274..10735 +ORIGIN + 1 agttgttagt ctacgtggac cgacaagaac agtttcgaat cggaagcttg cttaacgtag + 61 ttctaacagt tttttattag agagcagatc tctgatgaac aaccaacgga aaaagacggg + 121 tcgaccgtct ttcaatatgc tgaaacgcgc gagaaaccgc gtgtcaactg tttcacagtt + 181 ggcgaagaga ttctcaaaag gattgctttc aggccaagga cccatgaaat tggtgatggc + 241 ttttatagca ttcctaagat ttctagccat acctccaaca gcaggaattt tggctagatg + 301 gggctcattc aagaagaatg gagcgatcaa agtgttacgg ggtttcaaga aagaaatctc + 361 aaacatgttg aacataatga acaggaggaa aagatctgtg accatgctcc tcatgctgct + 421 gcccacagcc ctggcgttcc atctgaccac ccgaggggga gagccgcaca tgatagttag + 481 caagcaggaa agaggaaaat cacttttgtt taagacctct gcaggtgtca acatgtgcac + 541 ccttattgca atggatttgg gagagttatg tgaggacaca atgacctaca aatgcccccg + 601 gatcactgag acggaaccag atgacgttga ctgttggtgc aatgccacgg agacatgggt + 661 gacctatgga acatgttctc aaactggtga acaccgacga gacaaacgtt ccgtcgcact + 721 ggcaccacac gtagggcttg gtctagaaac aagaaccgaa acgtggatgt cctctgaagg + 781 cgcttggaaa caaatacaaa aagtggagac ctgggctctg agacacccag gattcacggt + 841 gatagccctt tttctagcac atgccatagg aacatccatc acccagaaag ggatcatttt + 901 tattttgctg atgctggtaa ctccatccat ggccatgcgg tgcgtgggaa taggcaacag + 961 agacttcgtg gaaggactgt caggagctac gtgggtggat gtggtactgg agcatggaag + 1021 ttgcgtcact accatggcaa aagacaaacc aacactggac attgaactct tgaagacgga + 1081 ggtcacaaac cctgccgtcc tgcgcaaact gtgcattgaa gctaaaatat caaacaccac + 1141 caccgattcg agatgtccaa cacaaggaga agccacgctg gtggaagaac aggacacgaa + 1201 ctttgtgtgt cgacgaacgt tcgtggacag aggctggggc aatggttgtg ggctattcgg + 1261 aaaaggtagc ttaataacgt gtgctaagtt taagtgtgtg acaaaactgg aaggaaagat + 1321 agtccaatat gaaaacttaa aatattcagt gatagtcacc gtacacactg gagaccagca + 1381 ccaagttgga aatgagacca cagaacatgg aacaactgca accataacac ctcaagctcc + 1441 cacgtcggaa atacagctga cagactacgg agctctaaca ttggattgtt cacctagaac + 1501 agggctagac tttaatgaga tggtgttgtt gacaatgaaa aaaaaatcat ggctcgtcca + 1561 caaacaatgg tttctagact taccactgcc ttggacctcg ggggcttcaa catcccaaga + 1621 gacttggaat agacaagact tgctggtcac atttaagaca gctcatgcaa aaaagcagga + 1681 agtagtcgta ctaggatcac aagaaggagc aatgcacact gcgttgactg gagcgacaga + 1741 aatccaaacg tctggaacga caacaatttt tgcaggacac ctgaaatgca gattaaaaat + 1801 ggataaactg attttaaaag ggatgtcata tgtaatgtgc acagggtcat tcaagttaga + 1861 gaaggaagtg gctgagaccc agcatggaac tgttctagtg caggttaaat acgaaggaac + 1921 agatgcacca tgcaagatcc ccttctcgtc ccaagatgag aagggagtaa cccagaatgg + 1981 gagattgata acagccaacc ccatagtcac tgacaaagaa aaaccagtca acattgaagc + 2041 ggagccacct tttggtgaga gctacattgt ggtaggagca ggtgaaaaag ctttgaaact + 2101 aagctggttc aagaagggaa gcagtatagg gaaaatgttt gaagcaactg cccgtggagc + 2161 acgaaggatg gccatcctgg gagacactgc atgggacttc ggttctatag gaggggtgtt + 2221 cacgtctgtg ggaaaactga tacaccagat ttttgggact gcgtatggag ttttgttcag + 2281 cggtgtttct tggaccatga agataggaat agggattctg ctgacatggc taggattaaa + 2341 ctcaaggagc acgtcccttt caatgacgtg tatcgcagtt ggcatggtca cactgtacct + 2401 aggagtcatg gttcaggcgg actcgggatg tgtaatcaac tggaaaggca gagaactcaa + 2461 atgtggaagc ggcatttttg tcaccaatga agtccacacc tggacagagc aatataaatt + 2521 ccaggccgac tcccctaaga gactatcagc ggccattggg aaggcatggg aggagggtgt + 2581 gtgtggaatt cgatcagcca ctcgtctcga gaacatcatg tggaagcaaa tatcaaatga + 2641 attaaaccac atcttacttg aaaatgacat gaaatttaca gtggtcgtag gagacgttag + 2701 tggaatcttg gcccaaggaa agaaaatgat taggccacaa cccatggaac acaaatactc + 2761 gtggaaaagc tggggaaaag ccaaaatcat aggagcagat gtacagaata ccaccttcat + 2821 catcgacggc ccaaacaccc cagaatgccc tgataaccaa agagcatgga acatttggga + 2881 agttgaagac tatggatttg gaattttcac gacaaacata tggttgaaat tgcgtgactc + 2941 ctacactcaa gtgtgtgacc accggctaat gtcagctgcc atcaaggata gcaaagcagt + 3001 ccatgctgac atggggtact ggatagaaag tgaaaagaac gagacttgga agttggcaag + 3061 agcctccttc atagaagtta agacatgcat ctggccaaaa tcccacactc tatggagcaa + 3121 tggagtcctg gaaagtgaga tgataatccc aaagatatat ggaggaccaa tatctcagca + 3181 caactacaga ccaggatatt tcacacaaac agcagggccg tggcacttgg gcaagttaga + 3241 actagatttt gatttatgtg aaggtaccac tgttgttgtg gatgaacatt gtggaaatcg + 3301 aggaccatct cttagaacca caacagtcac aggaaagaca atccatgaat ggtgctgtag + 3361 atcttgcacg ttaccccccc tacgtttcaa aggagaagac gggtgctggt acggcatgga + 3421 aatcagacca gtcaaggaga aggaagagaa cctagttaag tcaatggtct ctgcagggtc + 3481 aggagaagtg gacagttttt cactaggact gctatgcata tcaataatga tcgaagaggt + 3541 aatgagatcc agatggagca gaaaaatgct gatgactgga acattggctg tgttcctcct + 3601 tctcacaatg ggacaattga catggaatga tctgatcagg ctatgtatca tggttggagc + 3661 caacgcttca gacaagatgg ggatgggaac aacgtaccta gctttgatgg ccactttcag + 3721 aatgagacca atgttcgcag tcgggctact gtttcgcaga ttaacatcta gagaagttct + 3781 tcttcttaca gttggattga gtctggtggc atctgtagaa ctaccaaatt ccttagagga + 3841 gctaggggat ggacttgcaa tgggcatcat gatgttgaaa ttactgactg attttcagtc + 3901 acatcagcta tgggctacct tgctgtcttt aacatttgtc aaaacaactt tttcattgca + 3961 ctatgcatgg aagacaatgg ctatgatact gtcaattgta tctctcttcc ctttatgcct + 4021 gtccacgact tctcaaaaaa caacatggct tccggtgttg ctgggatctc ttggatgcaa + 4081 accactaacc atgtttctta taacagaaaa caaaatctgg ggaaggaaaa gctggcctct + 4141 caatgaagga attatggctg ttggaatagt tagcattctt ctaagttcac ttctcaagaa + 4201 tgatgtgcca ctagctggcc cactaatagc tggaggcatg ctaatagcat gttatgtcat + 4261 atctggaagc tcggccgatt tatcactgga gaaagcggct gaggtctcct gggaagaaga + 4321 agcagaacac tctggtgcct cacacaacat actagtggag gtccaagatg atggaaccat + 4381 gaagataaag gatgaagaga gagatgacac actcaccatt ctcctcaaag caactctgct + 4441 agcaatctca ggggtatacc caatgtcaat accggcgacc ctctttgtgt ggtatttttg + 4501 gcagaaaaag aaacagagat caggagtgct atgggacaca cccagccctc cagaagtgga + 4561 aagagcagtc cttgatgatg gcatttatag aattctccaa agaggattgt tgggcaggtc + 4621 tcaagtagga gtaggagttt ttcaagaagg cgtgttccac acaatgtggc acgtcaccag + 4681 gggagctgtc ctcatgtacc aagggaagag actggaacca agttgggcca gtgtcaaaaa + 4741 agacttgatc tcatatggag gaggttggag gtttcaagga tcctggaacg cgggagaaga + 4801 agtgcaggtg attgctgttg aaccggggaa gaaccccaaa aatgtacaga cagcgccggg + 4861 taccttcaag acccctgaag gcgaagttgg agccatagct ctagacttta aacccggcac + 4921 atctggatct cctatcgtga acagagaggg aaaaatagta ggtctttatg gaaatggagt + 4981 ggtgacaaca agtggtacct acgtcagtgc catagctcaa gctaaagcat cacaagaagg + 5041 gcctctacca gagattgagg acgaggtgtt taggaaaaga aacttaacaa taatggacct + 5101 acatccagga tcgggaaaaa caagaagata ccttccagcc atagtccgtg aggccataaa + 5161 aagaaagctg cgcacgctag tcttagctcc cacaagagtt gtcgcttctg aaatggcaga + 5221 ggcgctcaag ggaatgccaa taaggtatca gacaacagca gtgaagagtg aacacacggg + 5281 aaaggagata gttgacctta tgtgtcacgc cactttcact atgcgtctcc tgtctcctgt + 5341 gagagttccc aattataata tgattatcat ggatgaagca cattttaccg atccagccag + 5401 catagcagcc agagggtata tctcaacccg agtgggtatg ggtgaagcag ctgcgatttt + 5461 catgacagcc actccccccg gatcggtgga ggcctttcca cagagcaatg cagttatcca + 5521 agatgaggaa agagacattc ctgaaagatc atggaactca ggctatgact ggatcactga + 5581 tttcccaggt aaaacagtct ggtttgttcc aagcatcaaa tcaggaaatg acattgccaa + 5641 ctgtttaaga aagaatggga aacgggtggt ccaattgagc agaaaaactt ttgacactga + 5701 gtaccagaaa acaaaaaata acgactggga ctatgttgtc acaacagaca tatccgaaat + 5761 gggagcaaac ttccgagccg acagggtaat agacccgagg cggtgcctga aaccggtaat + 5821 actaaaagat ggcccagagc gtgtcattct agccggaccg atgccagtga ctgtggctag + 5881 cgccgcccag aggagaggaa gaattggaag gaaccaaaat aaggaaggcg atcagtatat + 5941 ttacatggga cagcctctaa acaatgatga ggaccacgcc cattggacag aagcaaaaat + 6001 gctccttgac aacataaaca caccagaagg gattatccca gccctctttg agccggagag + 6061 agaaaagagt gcagcaatag acggggaata cagactacgg ggtgaagcga ggaaaacgtt + 6121 cgtggagctc atgagaagag gagatctacc tgtctggcta tcctacaaag ttgcctcaga + 6181 aggcttccag tactccgaca gaaggtggtg ctttgatggg gaaaggaaca accaggtgtt + 6241 ggaggagaac atggacgtgg agatctggac aaaagaagga gaaagaaaga aactacgacc + 6301 ccgctggctg gatgccagaa catactctga cccactggct ctgcgcgaat tcaaagagtt + 6361 cgcagcagga agaagaagcg tctcaggtga cctaatatta gaaataggga aacttccaca + 6421 acatttaacg caaagggccc agaacgcctt ggacaatctg gttatgttgc acaactctga + 6481 acaaggagga aaagcctata gacacgccat ggaagaacta ccagacacca tagaaacgtt + 6541 aatgctccta gctttgatag ctgtgctgac tggtggagtg acgttgttct tcctatcagg + 6601 aaggggtcta ggaaaaacat ccattggcct actctgcgtg attgcctcaa gtgcactgtt + 6661 atggatggcc agtgtggaac cccattggat agcggcctct atcatactgg agttctttct + 6721 gatggtgttg cttattccag agccggacag acagcgcact ccacaagaca accagctagc + 6781 atacgtggtg ataggtctgt tattcatgat attgacagtg gcagccaatg agatgggatt + 6841 actggaaacc acaaagaagg acctggggat tggtcatgca gctgctgaaa accaccatca + 6901 tgctgcaatg ctggacgtag acctacatcc agcttcagcc tggactctct atgcagtggc + 6961 cacaacaatt atcactccca tgatgagaca cacaattgaa aacacaacgg caaatatttc + 7021 cctgacagct attgcaaacc aggcagctat attgatggga cttgacaagg gatggccaat + 7081 atcaaagatg gacataggag ttccacttct cgccttgggg tgctattctc aggtgaaccc + 7141 gctgacgctg acagcggcgg tattgatgct agtggctcat tatgccataa ttggacccgg + 7201 actgcaagca aaagctacta gagaagctca aaaaaggaca gcagccggaa taatgaaaaa + 7261 cccaactgtc gacgggatcg ttgcaataga tttggaccct gtggtttacg atgcaaaatt + 7321 tgaaaaacag ctaggccaaa taatgttgtt gatactttgc acatcacaga tcctcctgat + 7381 gcggaccaca tgggccttgt gtgaatccat cacactagcc actggacctc tgactacgct + 7441 ttgggaggga tctccaggaa aattctggaa caccacgata gcggtgtcca tggcaaacat + 7501 ttttagggga agttatctag caggagcagg tctggccttt tcattaatga aatctctagg + 7561 aggaggtagg agaggcacgg gagcccaagg ggaaacactg ggagaaaaat ggaaaagaca + 7621 gctaaaccaa ttgagcaagt cagaattcaa cacttacaaa aggagtggga ttatagaggt + 7681 ggatagatct gaagccaaag aggggttaaa aagaggagaa acgactaaac acgcagtgtc + 7741 gagaggaacg gccaaactga ggtggtttgt ggagaggaac cttgtgaaac cagaagggaa + 7801 agtcatagac ctcggttgtg gaagaggtgg ctggtcatat tattgcgctg ggctgaagaa + 7861 agtcacagaa gtgaaaggat acacgaaagg aggacctgga catgaggaac caatcccaat + 7921 ggcaacctat ggatggaacc tagtaaagct atactccggg aaagatgtat tctttacacc + 7981 acctgagaaa tgtgacaccc tcttgtgtga tattggtgag tcctctccga acccaactat + 8041 agaagaagga agaacgttac gtgttctaaa gatggtggaa ccatggctca gaggaaacca + 8101 attttgcata aaaattctaa atccctatat gccgagtgtg gtagaaactt tggagcaaat + 8161 gcaaagaaaa catggaggaa tgctagtgcg aaatccactc tcaagaaact ccactcatga + 8221 aatgtactgg gtttcatgtg gaacaggaaa cattgtgtca gcagtaaaca tgacatctag + 8281 aatgctgcta aatcgattca caatggctca caggaagcca acatatgaaa gagacgtgga + 8341 cttaggcgct ggaacaagac atgtggcagt agaaccagag gtggccaacc tagatatcat + 8401 tggccagagg atagagaata taaaaaatga acacaaatca acatggcatt atgatgagga + 8461 caatccatac aaaacatggg cctatcatgg atcatatgag gtcaagccat caggatcagc + 8521 ctcatccatg gtcaatggtg tggtgagact gctaaccaaa ccatgggatg tcattcccat + 8581 ggtcacacaa atagccatga ctgacaccac accctttgga caacagaggg tgtttaaaga + 8641 gaaagttgac acgcgtacac caaaagcgaa acgaggcaca gcacaaatta tggaggtgac + 8701 agccaggtgg ttatggggtt ttctctctag aaacaaaaaa cccagaatct gcacaagaga + 8761 ggagttcaca agaaaagtca ggtcaaacgc agctattgga gcagtgttcg ttgatgaaaa + 8821 tcaatggaac tcagcaaaag aggcagtgga agatgaacgg ttctgggacc ttgtgcacag + 8881 agagagggag cttcataaac aaggaaaatg tgccacgtgt gtctacaaca tgatgggaaa + 8941 gagagagaaa aaattaggag agttcggaaa ggcaaaagga agtcgcgcaa tatggtacat + 9001 gtggttggga gcgcgctttt tagagtttga agcccttggt ttcatgaatg aagatcactg + 9061 gttcagcaga gagaattcac tcagtggagt ggaaggagaa ggactccaca aacttggata + 9121 catactcaga gacatatcaa agattccagg gggaaatatg tatgcagatg acacagccgg + 9181 atgggacaca agaataacag aggatgatct tcagaatgag gccaaaatca ctgacatcat + 9241 ggaacctgaa catgccctat tggccacgtc aatctttaag ctaacctacc aaaacaaggt + 9301 agtaagggtg cagagaccag cgaaaaatgg aaccgtgatg gatgtcatat ccagacgtga + 9361 ccagagagga agtggacagg ttggaaccta tggcttaaac accttcacca acatggaggc + 9421 ccaactaata agacaaatgg agtctgaggg aatcttttca cccagcgaat tggaaacccc + 9481 aaatctagcc gaaagagtcc tcgactggtt gaaaaaacat ggcaccgaga ggctgaaaag + 9541 aatggcaatc agtggagatg actgtgtggt gaaaccaatc gatgacagat ttgcaacagc + 9601 cttaacagct ttgaatgaca tgggaaaggt aagaaaagac ataccgcaat gggaaccttc + 9661 aaaaggatgg aatgattggc aacaagtgcc tttctgttca caccatttcc accagctgat + 9721 tatgaaggat gggagggaga tagtggtgcc atgccgcaac caagatgaac ttgtaggtag + 9781 ggccagagta tcacaaggcg ccggatggag cttgagagaa actgcatgcc taggcaagtc + 9841 atatgcacaa atgtggcagc tgatgtactt ccacaggaga gacttgagat tagcggctaa + 9901 tgctatctgt tcagccgttc cagttgattg ggtcccaacc agccgcacca cctggtcgat + 9961 ccatgcccac catcaatgga tgacaacaga agacatgttg tcagtgtgga atagggtttg + 10021 gatagaggaa aacccatgga tggaggacaa gactcatgtg tccagttggg aagacgttcc + 10081 atacctagga aaaagggaag atcaatggtg tggttcccta ataggcttaa cagcacgagc + 10141 cacctgggcc accaacatac aagtggccat aaaccaagtg agaaggctca ttgggaatga + 10201 gaattatcta gacttcatga catcaatgaa gagattcaaa aacgagagtg atcccgaagg + 10261 ggcactctgg taagccaact cattcacaaa ataaaggaaa ataaaaaatc aaacaaggca + 10321 agaagtcagg ccggattaag ccatagcacg gtaagagcta tgctgcctgt gagccccgtc + 10381 caaggacgta aaatgaagtc aggccgaaag ccacggttcg agcaagccgt gctgcctgta + 10441 gctccatcgt ggggatgtaa aaacccggga ggctgcaaac catggaagct gtacgcatgg + 10501 ggtagcagac tagtggttag aggagacccc tcccaagaca caacgcagca gcggggccca + 10561 acaccagggg aagctgtacc ctggtggtaa ggactagagg ttagaggaga ccccccgcac + 10621 aacaacaaac agcatattga cgctgggaga gaccagagat cctgctgtct ctacagcatc + 10681 attccaggca cagaacgcca aaaaatggaa tggtgctgtt gaatcaacag gttct +// diff --git a/config/reference_dengue_denv2.gb b/config/reference_dengue_denv2.gb new file mode 100644 index 00000000..890881dd --- /dev/null +++ b/config/reference_dengue_denv2.gb @@ -0,0 +1,301 @@ +LOCUS DENV2/THAILAND/REFERENCE/1964 10723 bp DNA VRL 15-SEP-2015 +DEFINITION Dengue virus 2, complete genome. +ACCESSION NC_001474 +VERSION NC_001474.2 +DBLINK BioProject:PRJNA20183 +KEYWORDS RefSeq. +SOURCE Dengue virus 2 + ORGANISM Dengue virus 2 + Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; + Flaviviridae; Flavivirus; Dengue virus group. +REFERENCE 1 (bases 1 to 10723) + AUTHORS Kinney,R.M., Butrapet,S., Chang,G.J., Tsuchiya,K.R., Roehrig,J.T., + Bhamarapravati,N. and Gubler,D.J. + TITLE Construction of infectious cDNA clones for dengue 2 virus: strain + 16681 and its attenuated vaccine derivative, strain PDK-53 + JOURNAL Virology 230 (2), 300-308 (1997) + PUBMED 9143286 +REFERENCE 2 (bases 1 to 10723) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (01-NOV-2007) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 3 (bases 1 to 10723) + AUTHORS Kinney,R.M., Butrapet,S., Chang,G.J., Tsuchiya,K.R., Roehrig,J.T., + Bhamarapravati,N. and Gubler,D.J. + TITLE Direct Submission + JOURNAL Submitted (28-JAN-1997) Division of Vector-Borne Infectious + Diseases, National Center for Infectious Diseases, Centers for + Disease Control and Prevention, Public Health Service, U.S. + Department of Health and Human Services, P.O. Box 2087, Fort + Collins, CO 80522, USA +COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The + reference sequence was derived from U87411. + On Nov 1, 2007 this sequence version replaced gi:9626681. + The mature peptides were added by the NCBI staff following other + annotations for Dengue virus with the kind help of Dr. Vladimir + Yamshchikov (Southern Research Institute, Birmingham, AL USA). + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..10723 + /collection_date="1964" + /country="Thailand" + /db_xref="taxon:11060" + /mol_type="genomic RNA" + /organism="Dengue virus 2" + /strain="16681" + 5'UTR 1..96 + gene 97..10272 + /db_xref="GeneID:1494449" + /gene="flavivirus polyprotein gene" + CDS 97..438 + /db_xref="VBRC:35917" + /gene="C" + /product="anchored capsid protein C" + /protein_id="NP_739581.2" + CDS 439..936 + /db_xref="VBRC:35919" + /gene="M" + /product="membrane glycoprotein precursor M" + /protein_id="NP_739582.2" + CDS 439..711 + /gene="pr" + /note="peptide pr" + /product="protein pr" + /protein_id="YP_009164954.1" + CDS 712..936 + /db_xref="VBRC:35920" + /gene="flavivirus polyprotein gene" + /product="membrane glycoprotein M" + /protein_id="NP_739592.2" + CDS 937..2421 + /db_xref="VBRC:35921" + /gene="E" + /product="envelope protein E" + /protein_id="NP_739583.2" + CDS 2422..3477 + /db_xref="VBRC:35922" + /gene="NS1" + /product="nonstructural protein NS1" + /protein_id="NP_739584.2" + CDS 3478..4131 + /db_xref="VBRC:35923" + /gene="NS2A" + /product="nonstructural protein NS2A" + /protein_id="NP_739585.2" + CDS 4132..4521 + /db_xref="VBRC:35924" + /gene="NS2B" + /product="nonstructural protein NS2B" + /protein_id="NP_739586.2" + CDS 4522..6375 + /db_xref="VBRC:35925" + /gene="NS3" + /note="ATPase; component of capping enzyme (RNA + thriphosphatase); protease; RNA-helicase" + /product="nonstructural protein NS3" + /protein_id="NP_739587.2" + CDS 6376..6756 + /db_xref="VBRC:35926" + /gene="NS4A" + /product="nonstructural protein NS4A" + /protein_id="NP_739588.2" + CDS 6757..6825 + /db_xref="VBRC:35927" + /gene="2K" + /product="protein 2K" + /protein_id="NP_739593.2" + CDS 6826..7569 + /db_xref="VBRC:35928" + /gene="NS4B" + /product="nonstructural protein NS4B" + /protein_id="NP_739589.2" + CDS 7570..10269 + /db_xref="VBRC:35929" + /gene="NS5" + /note="methyltransferase component of capping enzyme; + nonstructural protein NS5" + /product="RNA-dependent RNA polymerase NS5" + /protein_id="NP_739590.2" + 3'UTR 10273..10723 +ORIGIN + 1 agttgttagt ctacgtggac cgacaaagac agattctttg agggagctaa gctcaacgta + 61 gttctaacag ttttttaatt agagagcaga tctctgatga ataaccaacg gaaaaaggcg + 121 aaaaacacgc ctttcaatat gctgaaacgc gagagaaacc gcgtgtcgac tgtgcaacag + 181 ctgacaaaga gattctcact tggaatgctg cagggacgag gaccattaaa actgttcatg + 241 gccctggtgg cgttccttcg tttcctaaca atcccaccaa cagcagggat attgaagaga + 301 tggggaacaa ttaaaaaatc aaaagctatt aatgttttga gagggttcag gaaagagatt + 361 ggaaggatgc tgaacatctt gaataggaga cgcagatctg caggcatgat cattatgctg + 421 attccaacag tgatggcgtt ccatttaacc acacgtaacg gagaaccaca catgatcgtc + 481 agcagacaag agaaagggaa aagtcttctg tttaaaacag aggatggcgt gaacatgtgt + 541 accctcatgg ccatggacct tggtgaattg tgtgaagaca caatcacgta caagtgtccc + 601 cttctcaggc agaatgagcc agaagacata gactgttggt gcaactctac gtccacgtgg + 661 gtaacttatg ggacgtgtac caccatggga gaacatagaa gagaaaaaag atcagtggca + 721 ctcgttccac atgtgggaat gggactggag acacgaactg aaacatggat gtcatcagaa + 781 ggggcctgga aacatgtcca gagaattgaa acttggatct tgagacatcc aggcttcacc + 841 atgatggcag caatcctggc atacaccata ggaacgacac atttccaaag agccctgatt + 901 ttcatcttac tgacagctgt cactccttca atgacaatgc gttgcatagg aatgtcaaat + 961 agagactttg tggaaggggt ttcaggagga agctgggttg acatagtctt agaacatgga + 1021 agctgtgtga cgacgatggc aaaaaacaaa ccaacattgg attttgaact gataaaaaca + 1081 gaagccaaac agcctgccac cctaaggaag tactgtatag aggcaaagct aaccaacaca + 1141 acaacagaat ctcgctgccc aacacaaggg gaacccagcc taaatgaaga gcaggacaaa + 1201 aggttcgtct gcaaacactc catggtagac agaggatggg gaaatggatg tggactattt + 1261 ggaaagggag gcattgtgac ctgtgctatg ttcagatgca aaaagaacat ggaaggaaaa + 1321 gttgtgcaac cagaaaactt ggaatacacc attgtgataa cacctcactc aggggaagag + 1381 catgcagtcg gaaatgacac aggaaaacat ggcaaggaaa tcaaaataac accacagagt + 1441 tccatcacag aagcagaatt gacaggttat ggcactgtca caatggagtg ctctccaaga + 1501 acgggcctcg acttcaatga gatggtgttg ctgcagatgg aaaataaagc ttggctggtg + 1561 cacaggcaat ggttcctaga cctgccgtta ccatggttgc ccggagcgga cacacaaggg + 1621 tcaaattgga tacagaaaga gacattggtc actttcaaaa atccccatgc gaagaaacag + 1681 gatgttgttg ttttaggatc ccaagaaggg gccatgcaca cagcacttac aggggccaca + 1741 gaaatccaaa tgtcatcagg aaacttactc ttcacaggac atctcaagtg caggctgaga + 1801 atggacaagc tacagctcaa aggaatgtca tactctatgt gcacaggaaa gtttaaagtt + 1861 gtgaaggaaa tagcagaaac acaacatgga acaatagtta tcagagtgca atatgaaggg + 1921 gacggctctc catgcaagat cccttttgag ataatggatt tggaaaaaag acatgtctta + 1981 ggtcgcctga ttacagtcaa cccaattgtg acagaaaaag atagcccagt caacatagaa + 2041 gcagaacctc cattcggaga cagctacatc atcataggag tagagccggg acaactgaag + 2101 ctcaactggt ttaagaaagg aagttctatc ggccaaatgt ttgagacaac aatgaggggg + 2161 gcgaagagaa tggccatttt aggtgacaca gcctgggatt ttggatcctt gggaggagtg + 2221 tttacatcta taggaaaggc tctccaccaa gtctttggag caatctatgg agctgccttc + 2281 agtggggttt catggactat gaaaatcctc ataggagtca ttatcacatg gataggaatg + 2341 aattcacgca gcacctcact gtctgtgaca ctagtattgg tgggaattgt gacactgtat + 2401 ttgggagtca tggtgcaggc cgatagtggt tgcgttgtga gctggaaaaa caaagaactg + 2461 aaatgtggca gtgggatttt catcacagac aacgtgcaca catggacaga acaatacaag + 2521 ttccaaccag aatccccttc aaaactagct tcagctatcc agaaagccca tgaagagggc + 2581 atttgtggaa tccgctcagt aacaagactg gagaatctga tgtggaaaca aataacacca + 2641 gaattgaatc acattctatc agaaaatgag gtgaagttaa ctattatgac aggagacatc + 2701 aaaggaatca tgcaggcagg aaaacgatct ctgcggcctc agcccactga gctgaagtat + 2761 tcatggaaaa catggggcaa agcaaaaatg ctctctacag agtctcataa ccagaccttt + 2821 ctcattgatg gccccgaaac agcagaatgc cccaacacaa atagagcttg gaattcgttg + 2881 gaagttgaag actatggctt tggagtattc accaccaata tatggctaaa attgaaagaa + 2941 aaacaggatg tattctgcga ctcaaaactc atgtcagcgg ccataaaaga caacagagcc + 3001 gtccatgccg atatgggtta ttggatagaa agtgcactca atgacacatg gaagatagag + 3061 aaagcctctt tcattgaagt taaaaactgc cactggccaa aatcacacac cctctggagc + 3121 aatggagtgc tagaaagtga gatgataatt ccaaagaatc tcgctggacc agtgtctcaa + 3181 cacaactata gaccaggcta ccatacacaa ataacaggac catggcatct aggtaagctt + 3241 gagatggact ttgatttctg tgatggaaca acagtggtag tgactgagga ctgcggaaat + 3301 agaggaccct ctttgagaac aaccactgcc tctggaaaac tcataacaga atggtgctgc + 3361 cgatcttgca cattaccacc gctaagatac agaggtgagg atgggtgctg gtacgggatg + 3421 gaaatcagac cattgaagga gaaagaagag aatttggtca actccttggt cacagctgga + 3481 catgggcagg tcgacaactt ttcactagga gtcttgggaa tggcattgtt cctggaggaa + 3541 atgcttagga cccgagtagg aacgaaacat gcaatactac tagttgcagt ttcttttgtg + 3601 acattgatca cagggaacat gtcctttaga gacctgggaa gagtgatggt tatggtaggc + 3661 gccactatga cggatgacat aggtatgggc gtgacttatc ttgccctact agcagccttc + 3721 aaagtcagac caacttttgc agctggacta ctcttgagaa agctgacctc caaggaattg + 3781 atgatgacta ctataggaat tgtactcctc tcccagagca ccataccaga gaccattctt + 3841 gagttgactg atgcgttagc cttaggcatg atggtcctca aaatggtgag aaatatggaa + 3901 aagtatcaat tggcagtgac tatcatggct atcttgtgcg tcccaaacgc agtgatatta + 3961 caaaacgcat ggaaagtgag ttgcacaata ttggcagtgg tgtccgtttc cccactgctc + 4021 ttaacatcct cacagcaaaa aacagattgg ataccattag cattgacgat caaaggtctc + 4081 aatccaacag ctatttttct aacaaccctc tcaagaacca gcaagaaaag gagctggcca + 4141 ttaaatgagg ctatcatggc agtcgggatg gtgagcattt tagccagttc tctcctaaaa + 4201 aatgatattc ccatgacagg accattagtg gctggagggc tcctcactgt gtgctacgtg + 4261 ctcactggac gatcggccga tttggaactg gagagagcag ccgatgtcaa atgggaagac + 4321 caggcagaga tatcaggaag cagtccaatc ctgtcaataa caatatcaga agatggtagc + 4381 atgtcgataa aaaatgaaga ggaagaacaa acactgacca tactcattag aacaggattg + 4441 ctggtgatct caggactttt tcctgtatca ataccaatca cggcagcagc atggtacctg + 4501 tgggaagtga agaaacaacg ggccggagta ttgtgggatg ttccttcacc cccacccatg + 4561 ggaaaggctg aactggaaga tggagcctat agaattaagc aaaaagggat tcttggatat + 4621 tcccagatcg gagccggagt ttacaaagaa ggaacattcc atacaatgtg gcatgtcaca + 4681 cgtggcgctg ttctaatgca taaaggaaag aggattgaac catcatgggc ggacgtcaag + 4741 aaagacctaa tatcatatgg aggaggctgg aagttagaag gagaatggaa ggaaggagaa + 4801 gaagtccagg tattggcact ggagcctgga aaaaatccaa gagccgtcca aacgaaacct + 4861 ggtcttttca aaaccaacgc cggaacaata ggtgctgtat ctctggactt ttctcctgga + 4921 acgtcaggat ctccaattat cgacaaaaaa ggaaaagttg tgggtcttta tggtaatggt + 4981 gttgttacaa ggagtggagc atatgtgagt gctatagccc agactgaaaa aagcattgaa + 5041 gacaacccag agatcgaaga tgacattttc cgaaagagaa gactgaccat catggacctc + 5101 cacccaggag cgggaaagac gaagagatac cttccggcca tagtcagaga agctataaaa + 5161 cggggtttga gaacattaat cttggccccc actagagttg tggcagctga aatggaggaa + 5221 gcccttagag gacttccaat aagataccag accccagcca tcagagctga gcacaccggg + 5281 cgggagattg tggacctaat gtgtcatgcc acatttacca tgaggctgct atcaccagtt + 5341 agagtgccaa actacaacct gattatcatg gacgaagccc atttcacaga cccagcaagt + 5401 atagcagcta gaggatacat ctcaactcga gtggagatgg gtgaggcagc tgggattttt + 5461 atgacagcca ctcccccggg aagcagagac ccatttcctc agagcaatgc accaatcata + 5521 gatgaagaaa gagaaatccc tgaacgttcg tggaattccg gacatgaatg ggtcacggat + 5581 tttaaaggga agactgtttg gttcgttcca agtataaaag caggaaatga tatagcagct + 5641 tgcctgagga aaaatggaaa gaaagtgata caactcagta ggaagacctt tgattctgag + 5701 tatgtcaaga ctagaaccaa tgattgggac ttcgtggtta caactgacat ttcagaaatg + 5761 ggtgccaatt tcaaggctga gagggttata gaccccagac gctgcatgaa accagtcata + 5821 ctaacagatg gtgaagagcg ggtgattctg gcaggaccta tgccagtgac ccactctagt + 5881 gcagcacaaa gaagagggag aataggaaga aatccaaaaa atgagaatga ccagtacata + 5941 tacatggggg aacctctgga aaatgatgaa gactgtgcac actggaaaga agctaaaatg + 6001 ctcctagata acatcaacac gccagaagga atcattccta gcatgttcga accagagcgt + 6061 gaaaaggtgg atgccattga tggcgaatac cgcttgagag gagaagcaag gaaaaccttt + 6121 gtagacttaa tgagaagagg agacctacca gtctggttgg cctacagagt ggcagctgaa + 6181 ggcatcaact acgcagacag aaggtggtgt tttgatggag tcaagaacaa ccaaatccta + 6241 gaagaaaacg tggaagttga aatctggaca aaagaagggg aaaggaagaa attgaaaccc + 6301 agatggttgg atgctaggat ctattctgac ccactggcgc taaaagaatt taaggaattt + 6361 gcagccggaa gaaagtctct gaccctgaac ctaatcacag aaatgggtag gctcccaacc + 6421 ttcatgactc agaaggcaag agacgcactg gacaacttag cagtgctgca cacggctgag + 6481 gcaggtggaa gggcgtacaa ccatgctctc agtgaactgc cggagaccct ggagacattg + 6541 cttttactga cacttctggc tacagtcacg ggagggatct ttttattctt gatgagcgga + 6601 aggggcatag ggaagatgac cctgggaatg tgctgcataa tcacggctag catcctccta + 6661 tggtacgcac aaatacagcc acactggata gcagcttcaa taatactgga gttttttctc + 6721 atagttttgc ttattccaga acctgaaaaa cagagaacac cccaagacaa ccaactgacc + 6781 tacgttgtca tagccatcct cacagtggtg gccgcaacca tggcaaacga gatgggtttc + 6841 ctagaaaaaa cgaagaaaga tctcggattg ggaagcattg caacccagca acccgagagc + 6901 aacatcctgg acatagatct acgtcctgca tcagcatgga cgctgtatgc cgtggccaca + 6961 acatttgtta caccaatgtt gagacatagc attgaaaatt cctcagtgaa tgtgtcccta + 7021 acagctatag ccaaccaagc cacagtgtta atgggtctcg ggaaaggatg gccattgtca + 7081 aagatggaca tcggagttcc ccttctcgcc attggatgct actcacaagt caaccccata + 7141 actctcacag cagctctttt cttattggta gcacattatg ccatcatagg gccaggactc + 7201 caagcaaaag caaccagaga agctcagaaa agagcagcgg cgggcatcat gaaaaaccca + 7261 actgtcgatg gaataacagt gattgaccta gatccaatac cttatgatcc aaagtttgaa + 7321 aagcagttgg gacaagtaat gctcctagtc ctctgcgtga ctcaagtatt gatgatgagg + 7381 actacatggg ctctgtgtga ggctttaacc ttagctaccg ggcccatctc cacattgtgg + 7441 gaaggaaatc cagggaggtt ttggaacact accattgcgg tgtcaatggc taacattttt + 7501 agagggagtt acttggccgg agctggactt ctcttttcta ttatgaagaa cacaaccaac + 7561 acaagaaggg gaactggcaa cataggagag acgcttggag agaaatggaa aagccgattg + 7621 aacgcattgg gaaaaagtga attccagatc tacaagaaaa gtggaatcca ggaagtggat + 7681 agaaccttag caaaagaagg cattaaaaga ggagaaacgg accatcacgc tgtgtcgcga + 7741 ggctcagcaa aactgagatg gttcgttgag agaaacatgg tcacaccaga agggaaagta + 7801 gtggacctcg gttgtggcag aggaggctgg tcatactatt gtggaggact aaagaatgta + 7861 agagaagtca aaggcctaac aaaaggagga ccaggacacg aagaacccat ccccatgtca + 7921 acatatgggt ggaatctagt gcgtcttcaa agtggagttg acgttttctt catcccgcca + 7981 gaaaagtgtg acacattatt gtgtgacata ggggagtcat caccaaatcc cacagtggaa + 8041 gcaggacgaa cactcagagt ccttaactta gtagaaaatt ggttgaacaa caacactcaa + 8101 ttttgcataa aggttctcaa cccatatatg ccctcagtca tagaaaaaat ggaagcacta + 8161 caaaggaaat atggaggagc cttagtgagg aatccactct cacgaaactc cacacatgag + 8221 atgtactggg tatccaatgc ttccgggaac atagtgtcat cagtgaacat gatttcaagg + 8281 atgttgatca acagatttac aatgagatac aagaaagcca cttacgagcc ggatgttgac + 8341 ctcggaagcg gaacccgtaa catcgggatt gaaagtgaga taccaaacct agatataatt + 8401 gggaaaagaa tagaaaaaat aaagcaagag catgaaacat catggcacta tgaccaagac + 8461 cacccataca aaacgtgggc ataccatggt agctatgaaa caaaacagac tggatcagca + 8521 tcatccatgg tcaacggagt ggtcaggctg ctgacaaaac cttgggacgt cgtccccatg + 8581 gtgacacaga tggcaatgac agacacgact ccatttggac aacagcgcgt ttttaaagag + 8641 aaagtggaca cgagaaccca agaaccgaaa gaaggcacga agaaactaat gaaaataaca + 8701 gcagagtggc tttggaaaga attagggaag aaaaagacac ccaggatgtg caccagagaa + 8761 gaattcacaa gaaaggtgag aagcaatgca gccttggggg ccatattcac tgatgagaac + 8821 aagtggaagt cggcacgtga ggctgttgaa gatagtaggt tttgggagct ggttgacaag + 8881 gaaaggaatc tccatcttga aggaaagtgt gaaacatgtg tgtacaacat gatgggaaaa + 8941 agagagaaga agctagggga attcggcaag gcaaaaggca gcagagccat atggtacatg + 9001 tggcttggag cacgcttctt agagtttgaa gccctaggat tcttaaatga agatcactgg + 9061 ttctccagag agaactccct gagtggagtg gaaggagaag ggctgcacaa gctaggttac + 9121 attctaagag acgtgagcaa gaaagaggga ggagcaatgt atgccgatga caccgcagga + 9181 tgggatacaa gaatcacact agaagaccta aaaaatgaag aaatggtaac aaaccacatg + 9241 gaaggagaac acaagaaact agccgaggcc attttcaaac taacgtacca aaacaaggtg + 9301 gtgcgtgtgc aaagaccaac accaagaggc acagtaatgg acatcatatc gagaagagac + 9361 caaagaggta gtggacaagt tggcacctat ggactcaata ctttcaccaa tatggaagcc + 9421 caactaatca gacagatgga gggagaagga gtctttaaaa gcattcagca cctaacaatc + 9481 acagaagaaa tcgctgtgca aaactggtta gcaagagtgg ggcgcgaaag gttatcaaga + 9541 atggccatca gtggagatga ttgtgttgtg aaacctttag atgacaggtt cgcaagcgct + 9601 ttaacagctc taaatgacat gggaaagatt aggaaagaca tacaacaatg ggaaccttca + 9661 agaggatgga atgattggac acaagtgccc ttctgttcac accatttcca tgagttaatc + 9721 atgaaagacg gtcgcgtact cgttgttcca tgtagaaacc aagatgaact gattggcaga + 9781 gcccgaatct cccaaggagc agggtggtct ttgcgggaga cggcctgttt ggggaagtct + 9841 tacgcccaaa tgtggagctt gatgtacttc cacagacgcg acctcaggct ggcggcaaat + 9901 gctatttgct cggcagtacc atcacattgg gttccaacaa gtcgaacaac ctggtccata + 9961 catgctaaac atgaatggat gacaacggaa gacatgctga cagtctggaa cagggtgtgg + 10021 attcaagaaa acccatggat ggaagacaaa actccagtgg aatcatggga ggaaatccca + 10081 tacttgggga aaagagaaga ccaatggtgc ggctcattga ttgggttaac aagcagggcc + 10141 acctgggcaa agaacatcca agcagcaata aatcaagtta gatcccttat aggcaatgaa + 10201 gaatacacag attacatgcc atccatgaaa agattcagaa gagaagagga agaagcagga + 10261 gttctgtggt agaaagcaaa actaacatga aacaaggcta gaagtcaggt cggattaagc + 10321 catagtacgg aaaaaactat gctacctgtg agccccgtcc aaggacgtta aaagaagtca + 10381 ggccatcata aatgccatag cttgagtaaa ctatgcagcc tgtagctcca cctgagaagg + 10441 tgtaaaaaat ccgggaggcc acaaaccatg gaagctgtac gcatggcgta gtggactagc + 10501 ggttagagga gacccctccc ttacaaatcg cagcaacaat gggggcccaa ggcgagatga + 10561 agctgtagtc tcgctggaag gactagaggt tagaggagac ccccccgaaa caaaaaacag + 10621 catattgacg ctgggaaaga ccagagatcc tgctgtctcc tcagcatcat tccaggcaca + 10681 gaacgccaga aaatggaatg gtgctgttga atcaacaggt tct +// diff --git a/config/reference_dengue_denv3.gb b/config/reference_dengue_denv3.gb new file mode 100644 index 00000000..a806ec6b --- /dev/null +++ b/config/reference_dengue_denv3.gb @@ -0,0 +1,283 @@ +LOCUS DENV3/SRI_LANKA/REFERENCE/2000 10707 bp DNA VRL 14-SEP-2015 +DEFINITION Dengue virus 3, complete genome. +ACCESSION NC_001475 +VERSION NC_001475.2 +DBLINK BioProject:PRJNA15598 +KEYWORDS RefSeq. +SOURCE Dengue virus 3 + ORGANISM Dengue virus 3 + Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; + Flaviviridae; Flavivirus; Dengue virus group. +REFERENCE 1 (bases 1 to 10707) + AUTHORS Peyrefitte,C.N., Couissinier-Paris,P., Mercier-Perennec,V., + Bessaud,M., Martial,J., Kenane,N., Durand,J.P. and Tolou,H.J. + TITLE Genetic Characterization of Newly Reintroduced Dengue Virus Type 3 + in Martinique (French West Indies) + JOURNAL J. Clin. Microbiol. 41 (11), 5195-5198 (2003) + PUBMED 14605161 +REFERENCE 2 (bases 1 to 10707) + AUTHORS Peyrefitte,C., Mercier,V., Couissinier-Paris,P., Tock,F., Kenane,N., + Durand,J.-P. and Tolou,H. + TITLE Direct Submission + JOURNAL Submitted (23-APR-2002) Unite de Virologie Tropicale, IMTSSA, Parc + du Pharo, Marseille Armees, BDR 13998, France +REFERENCE 3 (bases 1 to 10707) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (01-AUG-2000) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +COMMENT VALIDATED REFSEQ: This record has undergone validation or + preliminary review. The reference sequence was derived from + AY099336. + On Dec 20, 2007 this sequence version replaced gi:9626683. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..10707 + /country="Sri Lanka" + /db_xref="taxon:11069" + /isolate="D3/H/IMTSSA-SRI/2000/1266" + /mol_type="genomic RNA" + /organism="Dengue virus 3" + 5'UTR 1..94 + gene 95..10267 + /db_xref="GeneID:5075727" + /gene="flavivirus polyprotein gene" + CDS 95..436 + /gene="C" + /product="anchored capsid protein C" + /protein_id="YP_001531165.2" + CDS 437..934 + /gene="M" + /note="premembrane/membrane protein; protein preM" + /product="membrane glycoprotein precursor M" + /protein_id="YP_001531166.1" + CDS 437..709 + /gene="pr" + /note="peptide pr" + /product="protein pr" + /protein_id="YP_009164955.1" + CDS 710..934 + /gene="flavivirus polyprotein gene" + /note="protein M" + /product="membrane glycoprotein M" + /protein_id="YP_001531167.1" + CDS 935..2413 + /gene="E" + /note="protein E" + /product="envelope protein E" + /protein_id="YP_001531168.2" + CDS 2414..3469 + /gene="NS1" + /product="nonstructural protein NS1" + /protein_id="YP_001531169.2" + CDS 3470..4123 + /gene="NS2A" + /product="nonstructural protein NS2A" + /protein_id="YP_001531170.2" + CDS 4124..4513 + /gene="NS2B" + /product="nonstructural protein NS2B" + /protein_id="YP_001531171.3" + CDS 4514..6370 + /gene="NS3" + /product="nonstructural protein NS3" + /protein_id="YP_001531172.2" + CDS 6371..6751 + /gene="NS4A" + /product="nonstructural protein NS4A" + /protein_id="YP_001531173.2" + CDS 6752..6820 + /gene="2K" + /note="presumably plays a role in membrane rearrangement" + /product="protein 2K" + /protein_id="YP_001531174.2" + CDS 6821..7564 + /gene="NS4B" + /product="nonstructural protein NS4B" + /protein_id="YP_001531175.2" + CDS 7565..10264 + /gene="NS5" + /product="RNA-dependent RNA polymerase NS5" + /protein_id="YP_001531176.2" + 3'UTR 10268..10707 +ORIGIN + 1 agttgttagt ctacgtggac cgacaagaac agtttcgact cggaagcttg cttaacgtag + 61 tgctgacagt tttttattag agagcagatc tctgatgaac aaccaacgga agaagacggg + 121 aaaaccgtct atcaatatgc tgaaacgcgt gagaaaccgt gtgtcaactg gatcacagtt + 181 ggcgaagaga ttctcaaaag gactgctgaa cggccaggga ccaatgaaat tggttatggc + 241 gttcatagct ttcctcagat ttctagccat tccaccaaca gcaggagtct tggctagatg + 301 gggaaccttc aagaagtcgg gggccattaa ggtcctgaaa ggcttcaaga aggagatctc + 361 aaacatgctg agcataatca accaacggaa aaagacatcg ctctgtctca tgatgatatt + 421 gccagcagca cttgctttcc acttgacttc acgagatgga gagccgcgca tgattgtggg + 481 gaagaatgaa agaggtaaat ccctactttt taagacagcc tctggaatca acatgtgcac + 541 actcatagcc atggatttgg gagagatgtg tgatgacacg gtcacttaca aatgccccca + 601 cattaccgaa gtggaacctg aagacattga ctgctggtgc aaccttacat caacatgggt + 661 gacttatgga acgtgcaatc aagctggaga gcatagacgc gacaagagat cagtggcgtt + 721 agctccccat gtcggcatgg gactggacac acgcacccaa acctggatgt cggctgaagg + 781 agcttggaga caagtcgaga aggtagagac atgggccctt aggcacccag ggttcaccat + 841 actagcccta tttctcgccc attacatagg cacttccctg acccagaagg tggttatttt + 901 catattatta atgctggtca ccccatccat gacaatgaga tgtgtgggag taggaaacag + 961 agattttgtg gaagggctat caggagctac gtgggttgac gtggtgctcg agcacggggg + 1021 gtgtgtgact accatggcta agaacaagcc cacgctggat atagagcttc agaagaccga + 1081 ggccacccaa ctggcgaccc taaggaagct atgcattgag gggaaaatta ccaacataac + 1141 aactgactca agatgtccta cccaagggga agcggttttg cctgaggagc aggaccagaa + 1201 ctacgtgtgt aagcatacat acgtagacag aggttggggg aacggttgtg gtttgtttgg + 1261 caaaggaagc ttggtaacat gtgcgaaatt tcaatgcctg gaaccaatag agggaaaagt + 1321 ggtgcaatat gagaacctca aatacaccgt catcattaca gtgcacacag gagaccaaca + 1381 ccaggtggga aatgaaacgc aaggagtcac ggctgagata acacctcagg catcaaccac + 1441 tgaagccatc ttgcctgaat atggaaccct tgggctagaa tgctcaccac ggacaggttt + 1501 ggatttcaat gaaatgatct tactaacaat gaagaacaaa gcatggatgg tacatagaca + 1561 atggttcttt gacctacctc taccatgggc atcaggagct acaacagaaa caccaacctg + 1621 gaacaggaag gagcttcttg tgacattcaa aaacgcacat gcgaaaaaac aagaagtagt + 1681 tgtccttgga tcgcaagagg gagcaatgca taccgcactg acaggagcta cagaaatcca + 1741 aaactcagga ggcacaagca ttttcgcggg gcacttaaaa tgtagactta agatggacaa + 1801 attggaactc aaggggatga gctatgcaat gtgcacgaat acctttgtgt tgaagaaaga + 1861 agtctcagaa acgcagcacg ggacaatact cattaaggtt gagtacaaag gggaagatgc + 1921 accttgcaag attccctttt ccacagagga tggacaaggg aaagctcata atggcagact + 1981 gatcacagcc aaccctgtgg tgactaagaa ggaggagcct gtcaatattg aggctgaacc + 2041 tccttttggg gaaagcaata tagtaattgg aattggagac aacgccttga aaatcaactg + 2101 gtacaagaag gggagctcga ttgggaagat gttcgaggcc actgaaaggg gtgcaaggcg + 2161 catggccatc ttgggagaca cagcttggga ctttggatca gtgggtggtg ttctgaactc + 2221 attaggcaaa atggtgcacc aaatatttgg aagtgcttat acagccctgt tcagtggagt + 2281 ctcttgggtg atgaaaattg gaataggtgt cctcttgact tggatagggt tgaattcaaa + 2341 aaacacatcc atgtcatttt catgcattgc gataggaatc attacactct atctgggagc + 2401 tgtggtacaa gctgacatgg ggtgtgtcat aaactggaag ggcaaagaac tcaaatgtgg + 2461 aagcggaatt ttcgtcacca atgaggtcca tacctggaca gagcaataca aattccaagc + 2521 agactcccca aaaagattgg caacagccat tgcaggcgcc tgggagaatg gagtgtgtgg + 2581 aattaggtca acaaccagaa tggagaatct cttgtggaag caaatagcca atgaactgaa + 2641 ctacatatta tgggaaaaca atatcaaatt aacggtagtt gtgggcgata cacttggggt + 2701 cttagagcaa gggaaaagaa cactaacacc acaacccatg gagctaaaat actcatggaa + 2761 aacgtgggga aaggcaaaaa tagtgacagc tgaaacacaa aattcctctt tcataataga + 2821 cgggccaaac acaccggagt gtccaagtgc ctcaagagca tggaatgtgt gggaggtgga + 2881 agattacggg ttcggagtct tcacaaccaa catatggctg aaactccgag aggtctacac + 2941 ccaactatgt gaccataggc taatgtcggc agctgtcaag gatgagaggg ccgtgcatgc + 3001 cgacatgggc tactggatag aaagccaaaa gaatggaagt tggaagctag aaaaagcatc + 3061 cctcatagag gtaaaaacct gcacatggcc aaaatcacac actctctgga ctaatggtgt + 3121 gctagagagt gacatgatca tcccaaagag tctagctggt cctatctcac aacacaacta + 3181 caggcccggg taccacaccc aaacggcagg accctggcac ttaggaaaat tggagctgga + 3241 cttcaactac tgtgaaggaa caacagttgt catcacagaa agctgtggga caagaggccc + 3301 atcattgaga acaacaacag tgtcagggaa gttgatacac gaatggtgtt gccgctcgtg + 3361 cacacttccc cccctgcgat acatgggaga agacggctgc tggtatggca tggaaatcag + 3421 acccatcagt gagaaagaag agaacatggt aaagtcttta gtctcagcgg gaagtggaaa + 3481 ggtggacaac ttcacaatgg gtgtcttgtg tttggcaatc ctctttgaag aggtgttgag + 3541 aggaaaattt gggaagaaac acatgattgc aggggttttc tttacgtttg tgctccttct + 3601 ctcagggcaa ataacatgga gagacatggc gcacacacta ataatgatcg ggtccaacgc + 3661 ctctgacagg atgggaatgg gcgtcaccta cctagctcta attgcaacat ttaaaatcca + 3721 gccattcttg gctttgggat ttttcctaag aaagctgaca tctagagaaa atttattgtt + 3781 aggagttggg ttggccatgg caacaacgtt acaactgcca gaggacattg aacaaatggc + 3841 aaatggagtc gctctggggc tcatggctct taaactgata acacaatttg aaacatacca + 3901 attgtggacg gcattagtct ccttaacgtg ttcaaacaca atttttacgt tgactgttgc + 3961 ctggagaaca gccactctga ttttggccgg agtttcgctt ttaccagtgt gccagtcttc + 4021 aagcatgagg aaaacagatt ggctcccaat gacagtggca gctatgggag ttccacccct + 4081 tccacttttt atttttagct tgaaagacac actcaaaagg agaagctggc cactgaatga + 4141 aggggtgatg gctgttgggc ttgtgagcat tctggccagt tctctcctta gaaatgatgt + 4201 gcccatggct ggaccattag tggccggggg cttgctgata gcgtgctacg tcataactgg + 4261 cacgtcagcg gacctcactg tagaaaaagc cccagatgta acatgggagg aagaggctga + 4321 gcagacagga gtgtcccaca acttaatgat cacagttgat gatgatggaa caatgagaat + 4381 aaaagatgat gagactgaga acatcctaac agtgctttta aaaacagcat tactaatagt + 4441 atcaggcatt tttccatact ccatacccgc aacattgttg gtctggcaca cttggcaaaa + 4501 acaaacccaa agatccggcg ttttatggga cgtacccagc cccccagaga cacagaaagc + 4561 agaactggaa gaaggggttt ataggatcaa acagcaagga atttttggga aaacccaagt + 4621 aggggttgga gtacagaaag aaggagtctt ccacaccatg tggcacgtca caagaggggc + 4681 agtgttgaca cataatggga aaagactgga accaaactgg gctagtgtga aaaaagatct + 4741 gatttcatat ggaggaggat ggagactgag cgcacaatgg caaaaggggg aggaggtgca + 4801 ggttattgcc gtagagccag ggaagaaccc aaagaacttt caaaccacgc caggcacttt + 4861 ccagactact acaggggaaa taggagcaat tgcactggat ttcaagcctg gaacttcagg + 4921 atctcctatc ataaatagag agggaaaggt agtgggactg tatggcaatg gagtggttac + 4981 aaagaatggt ggctatgtca gcggaatagc gcaaacaaat gcagaaccag atggaccgac + 5041 accagagttg gaagaagaga tgttcaaaaa gcgaaacctg accataatgg atcttcatcc + 5101 tgggtcagga aagacacgga aataccttcc agctattgtc agagaggcaa tcaagagacg + 5161 tttaagaacc ttaattttgg caccgacaag ggtggttgca gctgagatgg aagaagcatt + 5221 gaaagggctc ccaataaggt accaaacaac agcaacaaaa tctgaacaca caggaagaga + 5281 gattgttgat ctaatgtgcc acgcaacgtt cacaatgcgt ttgctgtcac cagttagggt + 5341 tccaaattac aacttgataa taatggatga ggcccatttc acagacccag ccagtatagc + 5401 ggctagaggg tacatatcaa ctcgtgttgg aatgggagag gcagccgcaa tcttcatgac + 5461 agcaacaccc cctggaacag ctgatgcctt tcctcagagc aacgctccaa ttcaagatga + 5521 agaaagggac ataccagaac gctcatggaa ttcaggcaat gaatggatta ccgacttcgc + 5581 tgggaaaacg gtgtggtttg tccctagcat taaagccgga aatgacatag caaactgctt + 5641 gcgaaaaaac gggaaaaaag tcattcaact tagtaggaag acttttgaca cagaatatca + 5701 gaagactaaa ctgaatgatt gggactttgt ggtgacaact gacatttcag aaatgggggc + 5761 caatttcaaa gcagatagag tgatcgaccc aagaagatgt ctcaaaccag tgatcttgac + 5821 agatggacca gagcgggtga tcctggccgg accaatgcca gtcaccgcgg cgagtgctgc + 5881 gcaaaggaga gggagagttg gcaggaaccc acaaaaagag aatgaccagt acatattcac + 5941 gggccagcct ctcaacaatg atgaagacca tgctcactgg acagaagcaa aaatgctgct + 6001 ggacaacatc aacacaccag aagggattat accagctctc tttgaaccag aaagggagaa + 6061 gtcagccgcc atagacggtg agtatcgcct gaagggtgag tccaggaaga ctttcgtgga + 6121 actcatgagg aggggtgacc ttccagtttg gttagcccat aaagtagcat cagaaggaat + 6181 caaatacaca gatagaaaat ggtgctttga tgggcaacgc aataatcaaa ttttagagga + 6241 gaacatggat gtggaaattt ggacaaagga aggagaaaag aaaaaattga gacctaggtg + 6301 gcttgatgcc cgcacttatt cagatccatt ggcactcaag gaattcaagg actttgcggc + 6361 tggcagaaag tcaatcgccc ttgatcttgt gacagaaata ggaagagtgc cttcacatct + 6421 agcccacaga acaagaaacg ctctggacaa tctggtgatg ctgcatacgt cagaagatgg + 6481 cggtagggct tacaggcatg cggtggagga actaccagaa acaatggaaa cactcctact + 6541 cttgggacta atgatcttgt tgacaggtgg agcaatgctt ttcttgatat caggtaaagg + 6601 gattggaaag acttcaatag gactcatttg tgtaatcgct tccagcggca tgttgtggat + 6661 ggccgaagtt ccactccaat ggatcgcgtc ggctatagtc ctggagtttt ttatgatggt + 6721 gttgctcata ccagaaccag aaaagcagag aaccccccaa gacaaccaac tcgcatatgt + 6781 cgtgataggc atacttacat tggctgcaac aatagcagcc aatgaaatgg gactgctgga + 6841 aaccacaaag agagacttag gaatgtctaa ggagccaggt gttgtttctc caaccagcta + 6901 tttggatgtg gacttgcacc cagcatcagc ctggacattg tacgccgtgg ccactacagt + 6961 aataacacca atgttaagac ataccataga gaattctaca gcaaatgtgt ccctggcagc + 7021 tatagccaac caggcagtgg tcctgatggg tttggacaaa ggatggccaa tatcaaaaat + 7081 ggacttaggc gtgccactac tggcactggg ttgctattca caagtgaacc cactgactct + 7141 aactgcggca gtacttttgc taatcacaca ttatgctatc ataggtccag gattgcaagc + 7201 aaaagccacc cgtgaagctc agaaaaggac agctgctgga ataatgaaga atccaacagt + 7261 ggatgggata atgacaatag acctagattc tgtaatattt gattcaaaat ttgaaaaaca + 7321 actgggacag gttatgctcc tggttttgtg cgcagtccaa ctcttgctaa tgagaacatc + 7381 atgggccttg tgtgaagctt taactctagc tacaggacca ataacaacac tctgggaagg + 7441 atcacctggt aagttctgga acaccacgat agctgtttcc atggcgaaca tttttagagg + 7501 gagctattta gcaggagctg ggcttgcttt ttctattatg aaatcagttg gaacaggaaa + 7561 aagaggaaca ggctcacaag gtgaaacttt aggagaaaaa tggaaaaaga aattaaatca + 7621 attatcccgg aaagagtttg acctttacaa gaaatctgga atcactgaag tggatagaac + 7681 agaagccaaa gaagggttga aaagaggaga gacaacacat catgccgtgt cccgaggtag + 7741 cgcaaaactt caatggtttg tggaaagaaa catggtcgtt cccgaaggaa gagtcataga + 7801 cttgggctgt ggaagaggag gctggtcata ttactgtgca ggactgaaaa aagtcacaga + 7861 agtgcgagga tacacaaaag gcggtccagg acacgaagaa ccagtaccta tgtctacata + 7921 tggatggaac atagttaagt taatgagcgg aaaggatgtg ttctatctcc cacctgaaaa + 7981 gtgtgatacc ctgttgtgtg acattggaga atcttcacca agcccaacag tggaagagag + 8041 cagaactata agagttttga agatggttga accatggcta aaaaacaacc agttttgcat + 8101 taaagttttg aacccttaca tgccaactgt gattgagcac ctagaaagac tacaaaggaa + 8161 acatggagga atgcttgtga gaaatccact ttcacgaaac tccacgcacg aaatgtactg + 8221 gatatctaat ggcacaggta acattgtctc ttcagtcaac atggtgtcta gattgctact + 8281 gaacaggttc acgatgacac acaggagacc caccatagag aaagatgtgg atttaggagc + 8341 aggaactcga catgttaatg cggaaccaga aacacccaac atggatgtca ttggggaaag + 8401 aataaaaagg atcaaggagg agcataattc aacatggcac tatgatgacg aaaaccccta + 8461 caaaacgtgg gcttaccatg gatcctatga agtcaaagcc acaggctcag cctcctccat + 8521 gataaatgga gtcgtgaaac tcctcaccaa accatgggat gtggtgccca tggtgacaca + 8581 gatggcaatg acagacacaa ctccatttgg ccagcagaga gtctttaaag agaaagtgga + 8641 caccaggacg cccaggccca tgccagggac aagaaaggct atggagatca cagcggagtg + 8701 gctctggaga accctgggaa ggaacaaaag acccagatta tgcacaaggg aagagtttac + 8761 aaaaaaggtc agaactaacg cagccatggg cgccgttttc acagaggaga accaatggga + 8821 cagtgcgaaa gctgctgttg aggatgaaga attttggaaa cttgtggaca gagaacgtga + 8881 actccacaaa ttgggcaaat gtggaagctg cgtttataac atgatgggca agagagagaa + 8941 aaaacttgga gagtttggca aagcaaaagg cagtagagct atatggtaca tgtggttggg + 9001 agccaggtac cttgagttcg aagcccttgg attcttaaat gaagaccact ggttctcgcg + 9061 tgaaaactct tacagtggag tagaaggaga aggactgcac aagctaggct acatattaag + 9121 ggacatttcc aagatacccg gaggagccat gtatgctgat gacacagctg gttgggacac + 9181 aagaataaca gaagatgacc tgcacaatga ggaaaagatc atacagcaaa tggaccctga + 9241 acacaggcag ttagcgaacg ctatattcaa gctcacatac caaaacaaag tggtcaaagt + 9301 tcaacgaccg actccaacgg gcacggtaat ggatattata tctaggaaag accaaagggg + 9361 cagtggacaa ctgggaactt atggcctgaa tacattcacc aacatggaag cccagttagt + 9421 cagacaaatg gaaggagaag gtgtgctgac aaaggcagac ctcgagaacc ctcatctgct + 9481 agagaagaaa atcacacaat ggttggaaac caaaggagtg gagaggttaa aaagaatggc + 9541 cattagcggg gatgattgcg tggtgaaacc aatcgatgac aggttcgcta atgccctgct + 9601 tgctttgaac gatatgggaa aggttcggaa agacatacct caatggcagc catcaaaggg + 9661 atggcatgat tggcaacagg ttcctttctg ctcccaccac tttcatgaat tgatcatgaa + 9721 agatggaaga aagttggtgg ttccctgcag accccaggac gaactaatag gaagagcaag + 9781 aatctctcaa ggagcgggat ggagccttag agaaactgca tgtctgggga aagcctacgc + 9841 ccaaatgtgg agtctcatgt attttcacag aagagatctc agattagcat ccaacgccat + 9901 atgttcagca gtaccagtcc actgggttcc cacaagtaga acgacatggt ctattcatgc + 9961 tcaccatcag tggatgacta cagaagacat gcttactgtt tggaacaggg tgtggataga + 10021 ggaaaatcca tggatggaag acaaaactcc agttacaact tgggaaaatg ttccatatct + 10081 aggaaagaga gaagaccaat ggtgtggatc acttattggt ctcacttcca gagcaacctg + 10141 ggcccagaac atacccacag caattcaaca ggtgagaagc cttataggca atgaagagtt + 10201 cctggactac atgccttcaa tgaagagatt caggaaggaa gaggagtcgg agggagccat + 10261 ttggtaaacg taggaagtgg aaaagaggct aactgtcagg ccaccttaag ccacagtacg + 10321 gaagaagctg tgctgcctgt gagccccgtc caaggacgtt aaaagaagaa gtcaggcccc + 10381 aaagccacgg tttgagcaaa ccgtgctgcc tgtagctccg tcgtggggac gtaaaacctg + 10441 ggaggctgca aactgtggaa gctgtacgca cggtgtagca gactagcggt tagaggagac + 10501 ccctcccatg acacaacgca gcagcggggc ccgagcactg agggaagctg tacctccttg + 10561 caaaggacta gaggttagag gagacccccc gcaaataaaa acagcatatt gacgctggga + 10621 gagaccagag atcctgctgt ctcctcagca tcattccagg cacagaacgc cagaaaatgg + 10681 aatggtgctg ttgaatcaac aggttct +// diff --git a/config/reference_dengue_denv4.gb b/config/reference_dengue_denv4.gb new file mode 100644 index 00000000..f0bc91f7 --- /dev/null +++ b/config/reference_dengue_denv4.gb @@ -0,0 +1,275 @@ +LOCUS DENV4/NA/REFERENCE/2003 10649 bp DNA VRL 11-FEB-2016 +DEFINITION Dengue virus 4, complete genome. +ACCESSION NC_002640 +VERSION NC_002640.1 +DBLINK BioProject:PRJNA15599 +KEYWORDS RefSeq. +SOURCE Dengue virus 4 + ORGANISM Dengue virus 4 + Viruses; ssRNA viruses; ssRNA positive-strand viruses, no DNA stage; + Flaviviridae; Flavivirus; Dengue virus group. +REFERENCE 1 (bases 1 to 10649) + AUTHORS Durbin,A.P., Karron,R.A., Sun,W., Vaughn,D.W., Reynolds,M.J., + Perreault,J.R., Men,R.H., Lai,C.J., Elkins,W.R., Chanock,R.M., + Murphy,B.R. and Whitehead,S.S. + TITLE A live attenuated dengue virus type 4 vaccine candidate with a 30 + nucleotide deletion in the 3' untranslated region is highly + attenuated and immunogenic in humans + JOURNAL Unpublished +REFERENCE 2 (bases 1 to 10649) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (12-JAN-2001) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 3 (bases 1 to 10649) + AUTHORS Whitehead,S.S. + TITLE Direct Submission + JOURNAL Submitted (08-DEC-2000) LID, NIAID, 7 Center Drive, Bethesda, MD + 20892, USA +COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final + NCBI review. The reference sequence was derived from AF326825. + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..10649 + /clone="rDEN4" + /db_xref="taxon:11070" + /mol_type="genomic RNA" + /organism="Dengue virus 4" + 5'UTR 1..101 + gene 102..10265 + /db_xref="GeneID:5075729" + /gene="flavivirus polyprotein gene" + CDS 102..440 + /gene="C" + /product="anchored capsid protein C" + /protein_id="NP_740314.1" + CDS 441..938 + /gene="M" + /product="membrane glycoprotein precursor M" + /protein_id="NP_740315.1" + CDS 441..713 + /gene="pr" + /note="peptide pr" + /product="protein pr" + /protein_id="YP_009164957.1" + CDS 714..938 + /gene="flavivirus polyprotein gene" + /product="membrane glycoprotein M" + /protein_id="NP_740316.1" + CDS 939..2423 + /gene="E" + /product="envelope protein E" + /protein_id="NP_740317.1" + CDS 2424..3479 + /gene="NS1" + /product="nonstructural protein NS1" + /protein_id="NP_740318.1" + CDS 3480..4133 + /gene="NS2A" + /product="nonstructural protein NS2A" + /protein_id="NP_740319.1" + CDS 4134..4523 + /gene="NS2B" + /product="nonstructural protein NS2B" + /protein_id="NP_740320.1" + CDS 4524..6377 + /gene="NS3" + /product="nonstructural protein NS3" + /protein_id="NP_740321.1" + CDS 6378..6758 + /gene="NS4A" + /product="nonstructural protein NS4A" + /protein_id="NP_740322.1" + CDS 6759..6827 + /gene="2K" + /product="protein 2K" + /protein_id="NP_740323.1" + CDS 6828..7562 + /gene="NS4B" + /product="nonstructural protein NS4B" + /protein_id="NP_740324.1" + CDS 7563..10262 + /gene="NS5" + /product="RNA-dependent RNA polymerase NS5" + /protein_id="NP_740325.1" + 3'UTR 10266..10649 +ORIGIN + 1 agttgttagt ctgtgtggac cgacaaggac agttccaaat cggaagcttg cttaacacag + 61 ttctaacagt ttgtttgaat agagagcaga tctctggaaa aatgaaccaa cgaaaaaagg + 121 tggttagacc acctttcaat atgctgaaac gcgagagaaa ccgcgtatca acccctcaag + 181 ggttggtgaa gagattctca accggacttt tttctgggaa aggaccctta cggatggtgc + 241 tagcattcat cacgtttttg cgagtccttt ccatcccacc aacagcaggg attctgaaga + 301 gatggggaca gttgaagaaa aataaggcca tcaagatact gattggattc aggaaggaga + 361 taggccgcat gctgaacatc ttgaacggga gaaaaaggtc aacgataaca ttgctgtgct + 421 tgattcccac cgtaatggcg ttttccctca gcacaagaga tggcgaaccc ctcatgatag + 481 tggcaaaaca tgaaaggggg agacctctct tgtttaagac aacagagggg atcaacaaat + 541 gcactctcat tgccatggac ttgggtgaaa tgtgtgagga cactgtcacg tataaatgcc + 601 ccctactggt caataccgaa cctgaagaca ttgattgctg gtgcaacctc acgtctacct + 661 gggtcatgta tgggacatgc acccagagcg gagaacggag acgagagaag cgctcagtag + 721 ctttaacacc acattcagga atgggattgg aaacaagagc tgagacatgg atgtcatcgg + 781 aaggggcttg gaagcatgct cagagagtag agagctggat actcagaaac ccaggattcg + 841 cgctcttggc aggatttatg gcttatatga ttgggcaaac aggaatccag cgaactgtct + 901 tctttgtcct aatgatgctg gtcgccccat cctacggaat gcgatgcgta ggagtaggaa + 961 acagagactt tgtggaagga gtctcaggtg gagcatgggt cgacctggtg ctagaacatg + 1021 gaggatgcgt cacaaccatg gcccagggaa aaccaacctt ggattttgaa ctgactaaga + 1081 caacagccaa ggaagtggct ctgttaagaa cctattgcat tgaagcctca atatcaaaca + 1141 taactacggc aacaagatgt ccaacgcaag gagagcctta tctgaaagag gaacaggacc + 1201 aacagtacat ttgccggaga gatgtggtag acagagggtg gggcaatggc tgtggcttgt + 1261 ttggaaaagg aggagttgtg acatgtgcga agttttcatg ttcggggaag ataacaggca + 1321 atttggtcca aattgagaac cttgaataca cagtggttgt aacagtccac aatggagaca + 1381 cccatgcagt aggaaatgac acatccaatc atggagttac agccatgata actcccaggt + 1441 caccatcggt ggaagtcaaa ttgccggact atggagaact aacactcgat tgtgaaccca + 1501 ggtctggaat tgactttaat gagatgattc tgatgaaaat gaaaaagaaa acatggctcg + 1561 tgcataagca atggtttttg gatctgcctc ttccatggac agcaggagca gacacatcag + 1621 aggttcactg gaattacaaa gagagaatgg tgacatttaa ggttcctcat gccaagagac + 1681 aggatgtgac agtgctggga tctcaggaag gagccatgca ttctgccctc gctggagcca + 1741 cagaagtgga ctccggtgat ggaaatcaca tgtttgcagg acatcttaag tgcaaagtcc + 1801 gtatggagaa attgagaatc aagggaatgt catacacgat gtgttcagga aagttttcaa + 1861 ttgacaaaga gatggcagaa acacagcatg ggacaacagt ggtgaaagtc aagtatgaag + 1921 gtgctggagc tccgtgtaaa gtccccatag agataagaga tgtaaacaag gaaaaagtgg + 1981 ttgggcgtat catctcatcc acccctttgg ctgagaatac caacagtgta accaacatag + 2041 aattagaacc cccctttggg gacagctaca tagtgatagg tgttggaaac agcgcattaa + 2101 cactccattg gttcaggaaa gggagttcca ttggcaagat gtttgagtcc acatacagag + 2161 gtgcaaaacg aatggccatt ctaggtgaaa cagcttggga ttttggttcc gttggtggac + 2221 tgttcacatc attgggaaag gctgtgcacc aggtttttgg aagtgtgtat acaaccatgt + 2281 ttggaggagt ctcatggatg attagaatcc taattgggtt cttagtgttg tggattggca + 2341 cgaactcgag gaacacttca atggctatga cgtgcatagc tgttggagga atcactctgt + 2401 ttctgggctt cacagttcaa gcagacatgg gttgtgtggc gtcatggagt gggaaagaat + 2461 tgaagtgtgg aagcggaatt tttgtggttg acaacgtgca cacttggaca gaacagtaca + 2521 aatttcaacc agagtcccca gcgagactag cgtctgcaat attaaatgcc cacaaagatg + 2581 gggtctgtgg aattagatca accacgaggc tggaaaatgt catgtggaag caaataacca + 2641 acgagctaaa ctatgttctc tgggaaggag gacatgacct cactgtagtg gctggggatg + 2701 tgaagggggt gttgaccaaa ggcaagagag cactcacacc cccagtgagt gatctgaaat + 2761 attcatggaa gacatgggga aaagcaaaaa tcttcacccc agaagcaaga aatagcacat + 2821 ttttaataga cggaccagac acctctgaat gccccaatga acgaagagca tggaactctc + 2881 ttgaggtgga agactatgga tttggcatgt tcacgaccaa catatggatg aaattccgag + 2941 aaggaagttc agaagtgtgt gaccacaggt taatgtcagc tgcaattaaa gatcagaaag + 3001 ctgtgcatgc tgacatgggt tattggatag agagctcaaa aaaccagacc tggcagatag + 3061 agaaagcatc tcttattgaa gtgaaaacat gtctgtggcc caagacccac acactgtgga + 3121 gcaatggagt gctggaaagc cagatgctca ttccaaaatc atatgcgggc cctttttcac + 3181 agcacaatta ccgccagggc tatgccacgc aaaccgtggg cccatggcac ttaggcaaat + 3241 tagagataga ctttggagaa tgccccggaa caacagtcac aattcaggag gattgtgacc + 3301 atagaggccc atctttgagg accaccactg catctggaaa actagtcacg caatggtgct + 3361 gccgctcctg cacgatgcct cccttaaggt tcttgggaga agatgggtgc tggtatggga + 3421 tggagattag gcccttgagt gaaaaagaag agaacatggt caaatcacag gtgacggccg + 3481 gacagggcac atcagaaact ttttctatgg gtctgttgtg cctgaccttg tttgtggaag + 3541 aatgcttgag gagaagagtc actaggaaac acatgatatt agttgtggtg atcactcttt + 3601 gtgctatcat cctgggaggc ctcacatgga tggacttact acgagccctc atcatgttgg + 3661 gggacactat gtctggtaga ataggaggac agatccacct agccatcatg gcagtgttca + 3721 agatgtcacc aggatacgtg ctgggtgtgt ttttaaggaa actcacttca agagagacag + 3781 cactaatggt aataggaatg gccatgacaa cggtgctttc aattccacat gaccttatgg + 3841 aactcattga tggaatatca ctgggactaa ttttgctaaa aatagtaaca cagtttgaca + 3901 acacccaagt gggaacctta gctctttcct tgactttcat aagatcaaca atgccattgg + 3961 tcatggcttg gaggaccatt atggctgtgt tgtttgtggt cacactcatt cctttgtgca + 4021 ggacaagctg tcttcaaaaa cagtctcatt gggtagaaat aacagcactc atcctaggag + 4081 cccaagctct gccagtgtac ctaatgactc ttatgaaagg agcctcaaga agatcttggc + 4141 ctcttaacga gggcataatg gctgtgggtt tggttagtct cttaggaagc gctcttttaa + 4201 agaatgatgt ccctttagct ggcccaatgg tggcaggagg cttacttctg gcggcttacg + 4261 tgatgagtgg tagctcagca gatctgtcac tagagaaggc cgccaacgtg cagtgggatg + 4321 aaatggcaga cataacaggc tcaagcccaa tcgtagaagt gaagcaggat gaagatggct + 4381 ctttctccat acgggacgtc gaggaaacca atatgataac ccttttggtg aaactggcac + 4441 tgataacagt gtcaggtctc taccccttgg caattccagt cacaatgacc ttatggtaca + 4501 tgtggcaagt gaaaacacaa agatcaggag ccctgtggga cgtcccctca cccgctgcca + 4561 ctaaaaaagc cgcactgtct gaaggagtgt acaggatcat gcaaagaggg ttattcggga + 4621 aaactcaggt tggagtaggg atacacatgg aaggtgtatt tcacacaatg tggcatgtaa + 4681 caagaggatc agtgatctgc cacgagactg ggagattgga gccatcttgg gctgacgtca + 4741 ggaatgacat gatatcatac ggtgggggat ggaggcttgg agacaaatgg gacaaagaag + 4801 aagacgttca ggtcctcgcc atagaaccag gaaaaaatcc taaacatgtc caaacgaaac + 4861 ctggcctttt caagacccta actggagaaa ttggagcagt aacattagat ttcaaacccg + 4921 gaacgtctgg ttctcccatc atcaacagga aaggaaaagt catcggactc tatggaaatg + 4981 gagtagttac caaatcaggt gattacgtca gtgccataac gcaagccgaa agaattggag + 5041 agccagatta tgaagtggat gaggacattt ttcgaaagaa aagattaact ataatggact + 5101 tacaccccgg agctggaaag acaaaaagaa ttcttccatc aatagtgaga gaagccttaa + 5161 aaaggaggct acgaactttg attttagctc ccacgagagt ggtggcggcc gagatggaag + 5221 aggccctacg tggactgcca atccgttatc agaccccagc tgtgaaatca gaacacacag + 5281 gaagagagat tgtagacctc atgtgtcatg caaccttcac aacaagactt ttgtcatcaa + 5341 ccagggttcc aaattacaac cttatagtga tggatgaagc acatttcacc gatccttcta + 5401 gtgtcgcggc tagaggatac atctcgacca gggtggaaat gggagaggca gcagccatct + 5461 tcatgaccgc aacccctccc ggagcgacag atccctttcc ccagagcaac agcccaatag + 5521 aagacatcga gagggaaatt ccggaaaggt catggaacac agggttcgac tggataacag + 5581 actaccaagg gaaaactgtg tggtttgttc ccagcataaa agctggaaat gacattgcaa + 5641 attgtttgag aaagtcggga aagaaagtta tccagttgag taggaaaacc tttgatacag + 5701 agtatccaaa aacgaaactc acggactggg actttgtggt cactacagac atatctgaaa + 5761 tgggggccaa ttttagagcc gggagagtga tagaccctag aagatgcctc aagccagtta + 5821 tcctaccaga tgggccagag agagtcattt tagcaggtcc tattccagtg actccagcaa + 5881 gcgctgctca gagaagaggg cgaataggaa ggaacccagc acaagaagac gaccaatacg + 5941 ttttctccgg agacccacta aaaaatgatg aagatcatgc ccactggaca gaagcaaaga + 6001 tgctgcttga caatatctac accccagaag ggatcattcc aacattgttt ggtccggaaa + 6061 gggaaaaaac ccaagccatt gatggagagt ttcgcctcag aggggaacaa aggaagactt + 6121 ttgtggaatt aatgaggaga ggagaccttc cggtgtggct gagctataag gtagcttctg + 6181 ctggcatttc ttacgaagat cgggaatggt gcttcacagg ggaaagaaat aaccaaattt + 6241 tagaagaaaa catggaggtt gaaatttgga ctagagaggg agaaaagaaa aagctaaggc + 6301 caagatggtt agatgcacgt gtatacgctg accccatggc tttgaaggat ttcaaggagt + 6361 ttgccagtgg aaggaagagt ataactctcg acatcctaac agagattgcc agtttgccaa + 6421 cttacctttc ctctagggcc aagctcgccc ttgataacat agtcatgctc cacacaacag + 6481 aaagaggagg gagggcctat caacacgccc tgaacgaact tccggagtca ctggaaacac + 6541 tcatgcttgt agctttacta ggtgctatga cagcaggcat cttcctgttt ttcatgcaag + 6601 ggaaaggaat agggaaattg tcaatgggtt tgataaccat tgcggtggct agtggcttgc + 6661 tctgggtagc agaaattcaa ccccagtgga tagcggcctc aatcatacta gagttttttc + 6721 tcatggtact gttgataccg gaaccagaaa aacaaaggac cccacaagac aatcaattga + 6781 tctacgtcat attgaccatt ctcaccatca ttggtctaat agcagccaac gagatggggc + 6841 tgattgaaaa aacaaaaacg gattttgggt tttaccaggt aaaaacagaa accaccatcc + 6901 tcgatgtgga cttgagacca gcttcagcat ggacgctcta tgcagtagcc accacaattc + 6961 tgactcccat gctgagacac accatagaaa acacgtcggc caacctatct ctagcagcca + 7021 ttgccaacca ggcagccgtc ctaatggggc ttggaaaagg atggccgctc cacagaatgg + 7081 acctcggtgt gccgctgtta gcaatgggat gctattctca agtgaaccca acaaccttga + 7141 cagcatcctt agtcatgctt ttagtccatt atgcaataat aggcccagga ttgcaggcaa + 7201 aagccacaag agaggcccag aaaaggacag ctgctgggat catgaaaaat cccacagtgg + 7261 acgggataac agtaatagat ctagaaccaa tatcctatga cccaaaattt gaaaagcaat + 7321 tagggcaggt catgctacta gtcttgtgtg ctggacaact actcttgatg agaacaacat + 7381 gggctttctg tgaagtcttg actttggcca caggaccaat cttgaccttg tgggagggca + 7441 acccgggaag gttttggaac acgaccatag ccgtatccac cgccaacatt ttcaggggaa + 7501 gttacttggc gggagctgga ctggcttttt cactcataaa gaatgcacaa acccctagga + 7561 ggggaactgg gaccacagga gagacactgg gagagaagtg gaagagacag ctaaactcat + 7621 tagacagaaa agagtttgaa gagtataaaa gaagtggaat actagaagtg gacaggactg + 7681 aagccaagtc tgccctgaaa gatgggtcta aaatcaagca tgcagtatca agagggtcca + 7741 gtaagatcag atggattgtt gagagaggga tggtaaagcc aaaagggaaa gttgtagatc + 7801 ttggctgtgg gagaggagga tggtcttatt acatggcgac actcaagaac gtgactgaag + 7861 tgaaagggta tacaaaagga ggtccaggac atgaagaacc gattcccatg gctacttatg + 7921 gttggaattt ggtcaaactc cattcagggg ttgacgtgtt ctacaaaccc acagagcaag + 7981 tggacaccct gctctgtgat attggggagt catcttctaa tccaacaata gaggaaggaa + 8041 gaacattaag agttttgaag atggtggagc catggctctc ttcaaaacct gaattctgca + 8101 tcaaagtcct taacccctac atgccaacag tcatagaaga gctggagaaa ctgcagagaa + 8161 aacatggtgg gaaccttgtc agatgcccgc tgtccaggaa ctccacccat gagatgtatt + 8221 gggtgtcagg agcgtcggga aacattgtga gctctgtgaa cacaacatca aagatgttgt + 8281 tgaacaggtt cacaacaagg cataggaaac ccacttatga gaaggacgta gatcttgggg + 8341 caggaacgag aagtgtctcc actgaaacag aaaaaccaga catgacaatc attgggagaa + 8401 ggcttcagcg attgcaagaa gagcacaaag aaacctggca ttatgatcag gaaaacccat + 8461 acagaacctg ggcgtatcat ggaagctatg aagctccttc gacaggctct gcatcctcca + 8521 tggtgaacgg ggtggtaaaa ctgctaacaa aaccctggga tgtgattcca atggtgactc + 8581 agttagccat gacagataca accccttttg ggcaacaaag agtgttcaaa gagaaggtgg + 8641 ataccagaac accacaacca aaacccggta cacgaatggt tatgaccacg acagccaatt + 8701 ggctgtgggc cctccttgga aagaagaaaa atcccagact gtgcacaagg gaagagttca + 8761 tctcaaaagt tagatcaaac gcagccatag gcgcagtctt tcaggaagaa cagggatgga + 8821 catcagccag tgaagctgtg aatgacagcc ggttttggga actggttgac aaagaaaggg + 8881 ccctacacca ggaagggaaa tgtgaatcgt gtgtctataa catgatggga aaacgtgaga + 8941 aaaagttagg agagtttggc agagccaagg gaagccgagc aatctggtac atgtggctgg + 9001 gagcgcggtt tctggaattt gaagccctgg gttttttgaa tgaagatcac tggtttggca + 9061 gagaaaattc atggagtgga gtggaagggg aaggtctgca cagattggga tatatcctgg + 9121 aggagataga caagaaggat ggagacctaa tgtatgctga tgacacagca ggctgggaca + 9181 caagaatcac tgaggatgac cttcaaaatg aggaactgat cacggaacag atggctcccc + 9241 accacaagat cctagccaaa gccattttca aactaaccta tcaaaacaaa gtggtgaaag + 9301 tcctcagacc cacaccgcgg ggagcggtga tggatatcat atccaggaaa gaccaaagag + 9361 gtagtggaca agttggaaca tatggtttga acacattcac caacatggaa gttcaactca + 9421 tccgccaaat ggaagctgaa ggagtcatca cacaagatga catgcagaac ccaaaagggt + 9481 tgaaagaaag agttgagaaa tggctgaaag agtgtggtgt cgacaggtta aagaggatgg + 9541 caatcagtgg agacgattgc gtggtgaagc ccctagatga gaggtttggc acttccctcc + 9601 tcttcttgaa cgacatggga aaggtgagga aagacattcc gcagtgggaa ccatctaagg + 9661 gatggaaaaa ctggcaagag gttccttttt gctcccacca ctttcacaag atctttatga + 9721 aggatggccg ctcactagtt gttccatgta gaaaccagga tgaactgata gggagagcca + 9781 gaatctcgca gggagctgga tggagcttaa gagaaacagc ctgcctgggc aaagcttacg + 9841 cccagatgtg gtcgcttatg tacttccaca gaagggatct gcgtttagcc tccatggcca + 9901 tatgctcagc agttccaacg gaatggtttc caacaagcag aacaacatgg tcaatccacg + 9961 ctcatcacca gtggatgacc actgaagata tgctcaaagt gtggaacaga gtgtggatag + 10021 aagacaaccc taatatgact gacaagactc cagtccattc gtgggaagat ataccttacc + 10081 tagggaaaag agaggatttg tggtgtggat ccctgattgg actttcttcc agagccacct + 10141 gggcgaagaa cattcatacg gccataaccc aggtcaggaa cctgatcgga aaagaggaat + 10201 acgtggatta catgccagta atgaaaagat acagtgctcc ttcagagagt gaaggagttc + 10261 tgtaattacc aacaacaaac accaaaggct attgaagtca ggccacttgt gccacggttt + 10321 gagcaaaccg tgctgcctgt agctccgcca ataatgggag gcgtaataat ccccagggag + 10381 gccatgcgcc acggaagctg tacgcgtggc atattggact agcggttaga ggagacccct + 10441 cccatcactg ataaaacgca gcaaaagggg gcccgaagcc aggaggaagc tgtactcctg + 10501 gtggaaggac tagaggttag aggagacccc cccaacacaa aaacagcata ttgacgctgg + 10561 gaaagaccag agatcctgct gtctctgcaa catcaatcca ggcacagagc gccgcaagat + 10621 ggattggtgt tgttgatcca acaggttct +//