Skip to content

Commit 26414c7

Browse files
Rough working version for HomSap
1 parent 7bd2e1d commit 26414c7

File tree

5 files changed

+80
-38
lines changed

5 files changed

+80
-38
lines changed

maintenance/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
Code used for internal maintenance tasks.
33
"""
44
from . net import *
5+
from . species_data import *

stdpopsim/catalog/HomSap.py

+34-38
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import msprime
88

99
import stdpopsim
10+
from . import HomSap_genome_data
1011

1112
logger = logging.getLogger(__name__)
1213

@@ -16,43 +17,36 @@
1617
#
1718
###########################################################
1819

19-
# List of chromosomes.
20-
21-
# FIXME: add mean mutation rate data to this table.
22-
# Name Length mean_recombination_rate mean_mutation_rate
23-
24-
# length information can be found here
25-
# <http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz>
2620

2721
# mean_recombination_rate was computed across all windows of the GRCh37 genetic map
2822
# <ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110106_recombination_hotspots>
29-
_chromosome_data = """\
30-
chr1 249250621 1.1485597641285933e-08
31-
chr2 243199373 1.1054289277533446e-08
32-
chr3 198022430 1.1279585624662551e-08
33-
chr4 191154276 1.1231162636001008e-08
34-
chr5 180915260 1.1280936570022824e-08
35-
chr6 171115067 1.1222852661225285e-08
36-
chr7 159138663 1.1764614397655721e-08
37-
chr8 146364022 1.1478465778920576e-08
38-
chr9 141213431 1.1780701596308656e-08
39-
chr10 135534747 1.3365134257075317e-08
40-
chr11 135006516 1.1719334320833283e-08
41-
chr12 133851895 1.305017186986983e-08
42-
chr13 115169878 1.0914860554958317e-08
43-
chr14 107349540 1.119730771394731e-08
44-
chr15 102531392 1.3835785893339787e-08
45-
chr16 90354753 1.4834607113882717e-08
46-
chr17 81195210 1.582489036239487e-08
47-
chr18 78077248 1.5075956950023575e-08
48-
chr19 59128983 1.8220141872466202e-08
49-
chr20 63025520 1.7178269031631664e-08
50-
chr21 48129895 1.3045214034879191e-08
51-
chr22 51304566 1.4445022767788226e-08
52-
chrX 155270560 1.164662223273842e-08
53-
chrY 59373566 0.0
54-
"""
55-
23+
_recombination_rate_data = {
24+
"1": 1.1485597641285933e-08,
25+
"2": 1.1054289277533446e-08,
26+
"3": 1.1279585624662551e-08,
27+
"4": 1.1231162636001008e-08,
28+
"5": 1.1280936570022824e-08,
29+
"6": 1.1222852661225285e-08,
30+
"7": 1.1764614397655721e-08,
31+
"8": 1.1478465778920576e-08,
32+
"9": 1.1780701596308656e-08,
33+
"10": 1.3365134257075317e-08,
34+
"11": 1.1719334320833283e-08,
35+
"12": 1.305017186986983e-08,
36+
"13": 1.0914860554958317e-08,
37+
"14": 1.119730771394731e-08,
38+
"15": 1.3835785893339787e-08,
39+
"16": 1.4834607113882717e-08,
40+
"17": 1.582489036239487e-08,
41+
"18": 1.5075956950023575e-08,
42+
"19": 1.8220141872466202e-08,
43+
"20": 1.7178269031631664e-08,
44+
"21": 1.3045214034879191e-08,
45+
"22": 1.4445022767788226e-08,
46+
"X": 1.164662223273842e-08,
47+
"Y": 0.0,
48+
"MT": 0.0,
49+
}
5650

5751
_genome2001 = stdpopsim.Citation(
5852
doi="http://dx.doi.org/10.1038/35057062",
@@ -89,19 +83,21 @@
8983
)
9084

9185
_chromosomes = []
92-
for line in _chromosome_data.splitlines():
93-
name, length, mean_rr = line.split()[:3]
86+
for name, data in HomSap_genome_data.data["chromosomes"].items():
9487
_chromosomes.append(stdpopsim.Chromosome(
95-
id=name, length=int(length),
88+
id=name, length=int(data["length"]),
9689
mutation_rate=1.29e-8,
97-
recombination_rate=float(mean_rr)))
90+
recombination_rate=_recombination_rate_data[name]
91+
))
9892

9993
_genome = stdpopsim.Genome(
10094
chromosomes=_chromosomes,
10195
mutation_rate_citations=[
10296
_tian2019.because(stdpopsim.CiteReason.MUT_RATE)],
10397
recombination_rate_citations=[
10498
_hapmap2007.because(stdpopsim.CiteReason.REC_RATE)],
99+
assembly_name=HomSap_genome_data.data["assembly_name"],
100+
assembly_accession=HomSap_genome_data.data["assembly_accession"],
105101
assembly_citations=[
106102
_genome2001])
107103

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
data = {
2+
"assembly_accession": "GCA_000001405.28",
3+
"assembly_name": "GRCh38.p13",
4+
"chromosomes": {
5+
"Y": {"length": 57227415},
6+
"20": {"length": 64444167},
7+
"X": {"length": 156040895},
8+
"13": {"length": 114364328},
9+
"22": {"length": 50818468},
10+
"10": {"length": 133797422},
11+
"6": {"length": 170805979},
12+
"19": {"length": 58617616},
13+
"14": {"length": 107043718},
14+
"18": {"length": 80373285},
15+
"2": {"length": 242193529},
16+
"4": {"length": 190214555},
17+
"21": {"length": 46709983},
18+
"9": {"length": 138394717},
19+
"11": {"length": 135086622},
20+
"17": {"length": 83257441},
21+
"8": {"length": 145138636},
22+
"7": {"length": 159345973},
23+
"15": {"length": 101991189},
24+
"12": {"length": 133275309},
25+
"1": {"length": 248956422},
26+
"16": {"length": 90338345},
27+
"5": {"length": 181538259},
28+
"3": {"length": 198295559},
29+
"MT": {"length": 16569},
30+
},
31+
}

stdpopsim/genomes.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class Genome(object):
3232
mutation_rate_citations = attr.ib(factory=list, kw_only=True)
3333
recombination_rate_citations = attr.ib(factory=list, kw_only=True)
3434
assembly_citations = attr.ib(factory=list, kw_only=True)
35+
assembly_name = attr.ib(default="TMP", kw_only=True)
36+
assembly_accession = attr.ib(default="TMP", kw_only=True)
3537
length = attr.ib(default=0, init=False)
3638

3739
def __attrs_post_init__(self):

update_ensembl_data.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import maintenance
2+
import json
3+
import pprint
4+
5+
6+
if __name__ == "__main__":
7+
8+
# TODO we should make a HomSap directory where the
9+
# genome_data.py file can go.
10+
with open("stdpopsim/catalog/HomSap_genome_data.py", "w") as f:
11+
data = maintenance.get_genome_data("homo_sapiens")
12+
print("data = ", data, file=f)

0 commit comments

Comments
 (0)