opentargets · ireneisdoomed · Jan 12, 2024 · Jan 12, 2024 · Jan 12, 2024 · Jan 12, 2024
diff --git a/src/otg/dataset/l2g_prediction.py b/src/otg/dataset/l2g_prediction.py
@@ -49,7 +49,7 @@ def from_credible_set(
         v2g: V2G,
         coloc: Colocalisation,
     ) -> L2GPrediction:
-        """Initialise L2G from feature matrix.
+        """Extract L2G predictions for a set of credible sets derived from GWAS.
 
         Args:
             model_path (str): Path to the fitted model
@@ -62,9 +62,17 @@ def from_credible_set(
         Returns:
             L2GPrediction: L2G dataset
         """
+        gwas_study_locus = StudyLocus(
+            _df=study_locus.df.join(
+                study_index.study_type_lut().filter(f.col("studyType") == "gwas"),
+                on="studyId",
+                how="inner",
+            ),
+            _schema=StudyLocus.get_schema(),
+        )
         fm = L2GFeatureMatrix.generate_features(
             features_list=features_list,
-            study_locus=study_locus,
+            study_locus=gwas_study_locus,
             study_index=study_index,
             variant_gene=v2g,
             colocalisation=coloc,

diff --git a/src/otg/l2g.py b/src/otg/l2g.py
@@ -75,7 +75,17 @@ def __init__(
         v2g = V2G.from_parquet(session, variant_gene_path)
         coloc = Colocalisation.from_parquet(session, colocalisation_path)
 
-        if run_mode == "train":
+        if run_mode == "predict":
+            if not model_path or not predictions_path:
+                raise ValueError(
+                    "model_path and predictions_path must be set for predict mode."
+                )
+            predictions = L2GPrediction.from_credible_set(
+                model_path, features_list, credible_set, studies, v2g, coloc
+            )
+            predictions.df.write.mode(session.write_mode).parquet(predictions_path)
+            session.logger.info(predictions_path)
+        elif run_mode == "train":
             # Process gold standard and L2G features
             gs_curation = session.spark.read.json(gold_standard_curation_path)
             interactions = session.spark.read.parquet(gene_interactions_path)
@@ -160,14 +170,3 @@ def __init__(
                     **hyperparameters,
                 )
                 session.logger.info(model_path)
-
-        if run_mode == "predict":
-            if not model_path or not predictions_path:
-                raise ValueError(
-                    "model_path and predictions_path must be set for predict mode."
-                )
-            predictions = L2GPrediction.from_credible_set(
-                model_path, features_list, credible_set, studies, v2g, coloc
-            )
-            predictions.df.write.mode(session.write_mode).parquet(predictions_path)
-            session.logger.info(predictions_path)