|
7 | 7 | import msprime
|
8 | 8 |
|
9 | 9 | import stdpopsim
|
| 10 | +from . import HomSap_genome_data |
10 | 11 |
|
11 | 12 | logger = logging.getLogger(__name__)
|
12 | 13 |
|
|
16 | 17 | #
|
17 | 18 | ###########################################################
|
18 | 19 |
|
19 |
| -# List of chromosomes. |
20 |
| - |
21 |
| -# FIXME: add mean mutation rate data to this table. |
22 |
| -# Name Length mean_recombination_rate mean_mutation_rate |
23 |
| - |
24 |
| -# length information can be found here |
25 |
| -# <http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz> |
26 | 20 |
|
27 | 21 | # mean_recombination_rate was computed across all windows of the GRCh37 genetic map
|
28 | 22 | # <ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110106_recombination_hotspots>
|
29 |
| -_chromosome_data = """\ |
30 |
| -chr1 249250621 1.1485597641285933e-08 |
31 |
| -chr2 243199373 1.1054289277533446e-08 |
32 |
| -chr3 198022430 1.1279585624662551e-08 |
33 |
| -chr4 191154276 1.1231162636001008e-08 |
34 |
| -chr5 180915260 1.1280936570022824e-08 |
35 |
| -chr6 171115067 1.1222852661225285e-08 |
36 |
| -chr7 159138663 1.1764614397655721e-08 |
37 |
| -chr8 146364022 1.1478465778920576e-08 |
38 |
| -chr9 141213431 1.1780701596308656e-08 |
39 |
| -chr10 135534747 1.3365134257075317e-08 |
40 |
| -chr11 135006516 1.1719334320833283e-08 |
41 |
| -chr12 133851895 1.305017186986983e-08 |
42 |
| -chr13 115169878 1.0914860554958317e-08 |
43 |
| -chr14 107349540 1.119730771394731e-08 |
44 |
| -chr15 102531392 1.3835785893339787e-08 |
45 |
| -chr16 90354753 1.4834607113882717e-08 |
46 |
| -chr17 81195210 1.582489036239487e-08 |
47 |
| -chr18 78077248 1.5075956950023575e-08 |
48 |
| -chr19 59128983 1.8220141872466202e-08 |
49 |
| -chr20 63025520 1.7178269031631664e-08 |
50 |
| -chr21 48129895 1.3045214034879191e-08 |
51 |
| -chr22 51304566 1.4445022767788226e-08 |
52 |
| -chrX 155270560 1.164662223273842e-08 |
53 |
| -chrY 59373566 0.0 |
54 |
| -""" |
55 |
| - |
| 23 | +_recombination_rate_data = { |
| 24 | + "1": 1.1485597641285933e-08, |
| 25 | + "2": 1.1054289277533446e-08, |
| 26 | + "3": 1.1279585624662551e-08, |
| 27 | + "4": 1.1231162636001008e-08, |
| 28 | + "5": 1.1280936570022824e-08, |
| 29 | + "6": 1.1222852661225285e-08, |
| 30 | + "7": 1.1764614397655721e-08, |
| 31 | + "8": 1.1478465778920576e-08, |
| 32 | + "9": 1.1780701596308656e-08, |
| 33 | + "10": 1.3365134257075317e-08, |
| 34 | + "11": 1.1719334320833283e-08, |
| 35 | + "12": 1.305017186986983e-08, |
| 36 | + "13": 1.0914860554958317e-08, |
| 37 | + "14": 1.119730771394731e-08, |
| 38 | + "15": 1.3835785893339787e-08, |
| 39 | + "16": 1.4834607113882717e-08, |
| 40 | + "17": 1.582489036239487e-08, |
| 41 | + "18": 1.5075956950023575e-08, |
| 42 | + "19": 1.8220141872466202e-08, |
| 43 | + "20": 1.7178269031631664e-08, |
| 44 | + "21": 1.3045214034879191e-08, |
| 45 | + "22": 1.4445022767788226e-08, |
| 46 | + "X": 1.164662223273842e-08, |
| 47 | + "Y": 0.0, |
| 48 | + "MT": 0.0, |
| 49 | +} |
56 | 50 |
|
57 | 51 | _genome2001 = stdpopsim.Citation(
|
58 | 52 | doi="http://dx.doi.org/10.1038/35057062",
|
|
89 | 83 | )
|
90 | 84 |
|
91 | 85 | _chromosomes = []
|
92 |
| -for line in _chromosome_data.splitlines(): |
93 |
| - name, length, mean_rr = line.split()[:3] |
| 86 | +for name, data in HomSap_genome_data.data["chromosomes"].items(): |
94 | 87 | _chromosomes.append(stdpopsim.Chromosome(
|
95 |
| - id=name, length=int(length), |
| 88 | + id=name, length=int(data["length"]), |
96 | 89 | mutation_rate=1.29e-8,
|
97 |
| - recombination_rate=float(mean_rr))) |
| 90 | + recombination_rate=_recombination_rate_data[name] |
| 91 | + )) |
98 | 92 |
|
99 | 93 | _genome = stdpopsim.Genome(
|
100 | 94 | chromosomes=_chromosomes,
|
101 | 95 | mutation_rate_citations=[
|
102 | 96 | _tian2019.because(stdpopsim.CiteReason.MUT_RATE)],
|
103 | 97 | recombination_rate_citations=[
|
104 | 98 | _hapmap2007.because(stdpopsim.CiteReason.REC_RATE)],
|
| 99 | + assembly_name=HomSap_genome_data.data["assembly_name"], |
| 100 | + assembly_accession=HomSap_genome_data.data["assembly_accession"], |
105 | 101 | assembly_citations=[
|
106 | 102 | _genome2001])
|
107 | 103 |
|
|
0 commit comments