Skip to content

Commit

Permalink
Merge branch 'main' into il-gcat-preprocess-dag
Browse files Browse the repository at this point in the history
  • Loading branch information
d0choa committed Dec 12, 2023
2 parents 062fc65 + ee73572 commit 509557e
Show file tree
Hide file tree
Showing 39 changed files with 555 additions and 438 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.10.8
- name: Install and configure Poetry
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
with:
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.10.8
- name: Install and configure Poetry
Expand Down
6 changes: 5 additions & 1 deletion docs/python_api/datasource/gwas_catalog/associations.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@
title: Associations
---

::: otg.datasource.gwas_catalog.associations.GWASCatalogAssociations
::: otg.datasource.gwas_catalog.associations.GWASCatalogCuratedAssociationsParser

---

::: otg.datasource.gwas_catalog.associations.StudyLocusGWASCatalog
6 changes: 5 additions & 1 deletion docs/python_api/datasource/gwas_catalog/study_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@
title: Study Index
---

::: otg.datasource.gwas_catalog.study_index.GWASCatalogStudyIndex
::: otg.datasource.gwas_catalog.study_index.StudyIndexGWASCatalogParser

---

::: otg.datasource.gwas_catalog.study_index.StudyIndexGWASCatalog
136 changes: 127 additions & 9 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pyliftover = "^0.4"
xgboost = "^1.7.3"
numpy = "^1.26.1"
hail = "0.2.126"
wandb = "^0.16.0"
wandb = "^0.16.1"
google = "^3.0.0"
omegaconf = "^2.3.0"
typing-extensions = "^4.8.0"
Expand Down
86 changes: 54 additions & 32 deletions src/otg/assets/schemas/variant_annotation.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"metadata": {}
},
{
"name": "gnomad3VariantId",
"name": "gnomadVariantId",
"type": "string",
"nullable": false,
"metadata": {}
Expand Down Expand Up @@ -92,26 +92,72 @@
"metadata": {}
},
{
"name": "cadd",
"name": "inSilicoPredictors",
"nullable": false,
"metadata": {},
"type": {
"type": "struct",
"fields": [
{
"name": "phred",
"type": "float",
"name": "cadd",
"nullable": true,
"metadata": {},
"type": {
"type": "struct",
"fields": [
{
"name": "raw",
"type": "float",
"nullable": true,
"metadata": {}
},
{
"name": "phred",
"type": "float",
"nullable": true,
"metadata": {}
}
]
}
},
{
"name": "revelMax",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "raw",
"name": "spliceaiDsMax",
"type": "float",
"nullable": true,
"metadata": {}
},
{
"name": "pangolinLargestDs",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "phylop",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "siftMax",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "polyphenMax",
"type": "double",
"nullable": true,
"metadata": {}
}
]
},
"nullable": true,
"metadata": {}
}
},
{
"name": "vep",
Expand Down Expand Up @@ -158,30 +204,6 @@
"type": "string",
"nullable": true,
"metadata": {}
},
{
"name": "polyphenScore",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "polyphenPrediction",
"type": "string",
"nullable": true,
"metadata": {}
},
{
"name": "siftScore",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "siftPrediction",
"type": "string",
"nullable": true,
"metadata": {}
}
]
},
Expand Down
60 changes: 53 additions & 7 deletions src/otg/assets/schemas/variant_index.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,26 +76,72 @@
"metadata": {}
},
{
"name": "cadd",
"name": "inSilicoPredictors",
"nullable": false,
"metadata": {},
"type": {
"type": "struct",
"fields": [
{
"name": "phred",
"type": "float",
"name": "cadd",
"nullable": true,
"metadata": {},
"type": {
"type": "struct",
"fields": [
{
"name": "raw",
"type": "float",
"nullable": true,
"metadata": {}
},
{
"name": "phred",
"type": "float",
"nullable": true,
"metadata": {}
}
]
}
},
{
"name": "revelMax",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "raw",
"name": "spliceaiDsMax",
"type": "float",
"nullable": true,
"metadata": {}
},
{
"name": "pangolinLargestDs",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "phylop",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "siftMax",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "polyphenMax",
"type": "double",
"nullable": true,
"metadata": {}
}
]
},
"nullable": true,
"metadata": {}
}
},
{
"name": "mostSevereConsequence",
Expand Down
8 changes: 4 additions & 4 deletions src/otg/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def _align_overlapping_tags(
)

@staticmethod
def _update_quality_flag(
def update_quality_flag(
qc: Column, flag_condition: Column, flag_text: StudyLocusQualityCheck
) -> Column:
"""Update the provided quality control list with a new flag if condition is met.
Expand Down Expand Up @@ -410,7 +410,7 @@ def clump(self: StudyLocus) -> StudyLocus:
)
.withColumn(
"qualityControls",
StudyLocus._update_quality_flag(
StudyLocus.update_quality_flag(
f.col("qualityControls"),
f.col("is_lead_linked"),
StudyLocusQualityCheck.LD_CLUMPED,
Expand All @@ -430,7 +430,7 @@ def _qc_unresolved_ld(
"""
self.df = self.df.withColumn(
"qualityControls",
self._update_quality_flag(
self.update_quality_flag(
f.col("qualityControls"),
f.col("ldSet").isNull(),
StudyLocusQualityCheck.UNRESOLVED_LD,
Expand All @@ -450,7 +450,7 @@ def _qc_no_population(self: StudyLocus) -> StudyLocus:

self.df = self.df.withColumn(
"qualityControls",
self._update_quality_flag(
self.update_quality_flag(
f.col("qualityControls"),
f.col("ldPopulationStructure").isNull(),
StudyLocusQualityCheck.NO_POPULATION,
Expand Down
61 changes: 0 additions & 61 deletions src/otg/dataset/variant_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,67 +136,6 @@ def get_most_severe_vep_v2g(
_schema=V2G.get_schema(),
)

def get_polyphen_v2g(
self: VariantAnnotation, gene_index: GeneIndex | None = None
) -> V2G:
"""Creates a dataset with variant to gene assignments with a PolyPhen's predicted score on the transcript.
Polyphen informs about the probability that a substitution is damaging.The score can be interpreted as follows:
- 0.0 to 0.15 -- Predicted to be benign.
- 0.15 to 1.0 -- Possibly damaging.
- 0.85 to 1.0 -- Predicted to be damaging.
Args:
gene_index (GeneIndex | None): A gene index to filter by. Defaults to None.
Returns:
V2G: variant to gene assignments with their polyphen scores
"""
return V2G(
_df=(
self.get_transcript_consequence_df(gene_index)
.filter(f.col("transcriptConsequence.polyphenScore").isNotNull())
.select(
"variantId",
"chromosome",
"geneId",
f.col("transcriptConsequence.polyphenScore").alias("score"),
f.lit("vep").alias("datatypeId"),
f.lit("polyphen").alias("datasourceId"),
)
),
_schema=V2G.get_schema(),
)

def get_sift_v2g(self: VariantAnnotation, gene_index: GeneIndex) -> V2G:
"""Creates a dataset with variant to gene assignments with a SIFT's predicted score on the transcript.
SIFT informs about the probability that a substitution is tolerated. The score can be interpreted as follows:
- 0.0 to 0.05 -- Likely to be deleterious.
- 0.05 to 1.0 -- Likely to be tolerated.
Args:
gene_index (GeneIndex): A gene index to filter by.
Returns:
V2G: variant to gene assignments with their SIFT scores
"""
return V2G(
_df=(
self.get_transcript_consequence_df(gene_index)
.filter(f.col("transcriptConsequence.siftScore").isNotNull())
.select(
"variantId",
"chromosome",
"geneId",
f.expr("1 - transcriptConsequence.siftScore").alias("score"),
f.lit("vep").alias("datatypeId"),
f.lit("sift").alias("datasourceId"),
)
),
_schema=V2G.get_schema(),
)

def get_plof_v2g(self: VariantAnnotation, gene_index: GeneIndex) -> V2G:
"""Creates a dataset with variant to gene assignments with a flag indicating if the variant is predicted to be a loss-of-function variant by the LOFTEE algorithm.
Expand Down
2 changes: 1 addition & 1 deletion src/otg/dataset/variant_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def from_variant_annotation(
"positionB37",
"alleleType",
"alleleFrequencies",
"cadd",
"inSilicoPredictors",
]
va_slimmed = variant_annotation.filter_by_variant_df(
study_locus.unique_variants_in_locus()
Expand Down
Loading

0 comments on commit 509557e

Please sign in to comment.