diff --git a/config/config.yaml b/config/config.yaml deleted file mode 100644 index 1e524ed5f..000000000 --- a/config/config.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: - - datasets: gcp - - step/session: dataproc - - step: ??? diff --git a/config/datasets/gcp.yaml b/config/datasets/ot_gcp.yaml similarity index 100% rename from config/datasets/gcp.yaml rename to config/datasets/ot_gcp.yaml diff --git a/config/ot_config.yaml b/config/ot_config.yaml new file mode 100644 index 000000000..7f28a58d6 --- /dev/null +++ b/config/ot_config.yaml @@ -0,0 +1,5 @@ +defaults: + - config + - datasets: ot_gcp + - _self_ + - override step/session: dataproc diff --git a/config/step/finngen_sumstat_preprocess.yaml b/config/step/finngen_sumstat_preprocess.yaml deleted file mode 100644 index 319e7af63..000000000 --- a/config/step/finngen_sumstat_preprocess.yaml +++ /dev/null @@ -1,3 +0,0 @@ -_target_: otg.finngen_sumstat_preprocess.FinnGenSumstatPreprocessStep -raw_sumstats_path: ??? -out_sumstats_path: ??? diff --git a/config/step/gwas_catalog_sumstat_preprocess.yaml b/config/step/gwas_catalog_sumstat_preprocess.yaml deleted file mode 100644 index 29d3486e8..000000000 --- a/config/step/gwas_catalog_sumstat_preprocess.yaml +++ /dev/null @@ -1,3 +0,0 @@ -_target_: otg.gwas_catalog_sumstat_preprocess.GWASCatalogSumstatsPreprocessStep -raw_sumstats_path: ??? -out_sumstats_path: ??? diff --git a/config/step/ld_index.yaml b/config/step/ld_index.yaml deleted file mode 100644 index 86f595076..000000000 --- a/config/step/ld_index.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_target_: otg.ld_index.LDIndexStep - -session: - start_hail: true - -ld_index_out: ${datasets.ld_index} diff --git a/config/step/colocalisation.yaml b/config/step/ot_colocalisation.yaml similarity index 74% rename from config/step/colocalisation.yaml rename to config/step/ot_colocalisation.yaml index 2a4062a56..97cfa1bea 100644 --- a/config/step/colocalisation.yaml +++ b/config/step/ot_colocalisation.yaml @@ -1,4 +1,4 @@ _target_: otg.colocalisation.ColocalisationStep -credible_set_path: ${datasets.credible_set} +credible_set_path: ${datasets.study_locus} study_index_path: ${datasets.study_index} coloc_path: ${datasets.colocalisation} diff --git a/config/step/eqtl_catalogue.yaml b/config/step/ot_eqtl_catalogue.yaml similarity index 82% rename from config/step/eqtl_catalogue.yaml rename to config/step/ot_eqtl_catalogue.yaml index 04a958993..141c94ee3 100644 --- a/config/step/eqtl_catalogue.yaml +++ b/config/step/ot_eqtl_catalogue.yaml @@ -1,4 +1,6 @@ -_target_: otg.eqtl_catalogue.EqtlCatalogueStep +defaults: + - eqtl_catalogue + eqtl_catalogue_paths_imported: ${datasets.eqtl_catalogue_paths_imported} eqtl_catalogue_study_index_out: ${datasets.eqtl_catalogue_study_index_out} eqtl_catalogue_summary_stats_out: ${datasets.eqtl_catalogue_summary_stats_out} diff --git a/config/step/finngen_studies.yaml b/config/step/ot_finngen_studies.yaml similarity index 53% rename from config/step/finngen_studies.yaml rename to config/step/ot_finngen_studies.yaml index 23b58c443..673e0f269 100644 --- a/config/step/finngen_studies.yaml +++ b/config/step/ot_finngen_studies.yaml @@ -1,2 +1,4 @@ -_target_: otg.finngen_studies.FinnGenStudiesStep +defaults: + - finngen_studies + finngen_study_index_out: ${datasets.finngen_study_index} diff --git a/config/step/ot_finngen_sumstat_preprocess.yaml b/config/step/ot_finngen_sumstat_preprocess.yaml new file mode 100644 index 000000000..ad0e93a09 --- /dev/null +++ b/config/step/ot_finngen_sumstat_preprocess.yaml @@ -0,0 +1,5 @@ +defaults: + - finngen_sumstat_preprocess + +raw_sumstats_path: ??? +out_sumstats_path: ??? diff --git a/config/step/gene_index.yaml b/config/step/ot_gene_index.yaml similarity index 66% rename from config/step/gene_index.yaml rename to config/step/ot_gene_index.yaml index 20de8dfbe..ce5971bf9 100644 --- a/config/step/gene_index.yaml +++ b/config/step/ot_gene_index.yaml @@ -1,3 +1,5 @@ -_target_: otg.gene_index.GeneIndexStep +defaults: + - gene_index + target_path: ${datasets.target_index} gene_index_path: ${datasets.gene_index} diff --git a/config/step/gwas_catalog_ingestion.yaml b/config/step/ot_gwas_catalog_ingestion.yaml similarity index 88% rename from config/step/gwas_catalog_ingestion.yaml rename to config/step/ot_gwas_catalog_ingestion.yaml index 66fe37ce9..65606b7e4 100644 --- a/config/step/gwas_catalog_ingestion.yaml +++ b/config/step/ot_gwas_catalog_ingestion.yaml @@ -1,4 +1,6 @@ -_target_: otg.gwas_catalog_ingestion.GWASCatalogIngestionStep +defaults: + - gwas_catalog_ingestion + catalog_study_files: ${datasets.catalog_studies} catalog_ancestry_files: ${datasets.catalog_ancestries} catalog_associations_file: ${datasets.catalog_associations} diff --git a/config/step/gwas_catalog_curation_update.yaml b/config/step/ot_gwas_catalog_study_curation.yaml similarity index 79% rename from config/step/gwas_catalog_curation_update.yaml rename to config/step/ot_gwas_catalog_study_curation.yaml index 979439153..eb6c0ec78 100644 --- a/config/step/gwas_catalog_curation_update.yaml +++ b/config/step/ot_gwas_catalog_study_curation.yaml @@ -1,4 +1,6 @@ -_target_: otg.gwas_catalog_study_curation.GWASCatalogStudyCurationStep +defaults: + - gwas_catalog_study_curation + catalog_study_files: ${datasets.catalog_studies} catalog_ancestry_files: ${datasets.catalog_ancestries} catalog_sumstats_lut: ${datasets.catalog_sumstats_lut} diff --git a/config/step/gwas_study_inclusion.yaml b/config/step/ot_gwas_catalog_study_inclusion.yaml similarity index 84% rename from config/step/gwas_study_inclusion.yaml rename to config/step/ot_gwas_catalog_study_inclusion.yaml index 60916381b..8a560127e 100644 --- a/config/step/gwas_study_inclusion.yaml +++ b/config/step/ot_gwas_catalog_study_inclusion.yaml @@ -1,4 +1,6 @@ -_target_: otg.gwas_catalog_study_inclusion.GWASCatalogInclusionGenerator +defaults: + - gwas_catalog_study_inclusion + catalog_study_files: ${datasets.catalog_studies} catalog_ancestry_files: ${datasets.catalog_ancestries} catalog_associations_file: ${datasets.catalog_associations} diff --git a/config/step/ot_gwas_catalog_sumstat_preprocess.yaml b/config/step/ot_gwas_catalog_sumstat_preprocess.yaml new file mode 100644 index 000000000..d0c936807 --- /dev/null +++ b/config/step/ot_gwas_catalog_sumstat_preprocess.yaml @@ -0,0 +1,5 @@ +defaults: + - gwas_catalog_sumstat_preprocess + +raw_sumstats_path: ??? +out_sumstats_path: ??? diff --git a/config/step/ld_based_clumping.yaml b/config/step/ot_ld_based_clumping.yaml similarity index 67% rename from config/step/ld_based_clumping.yaml rename to config/step/ot_ld_based_clumping.yaml index eac354e4f..d25ca84b7 100644 --- a/config/step/ld_based_clumping.yaml +++ b/config/step/ot_ld_based_clumping.yaml @@ -1,4 +1,6 @@ -_target_: otg.ld_based_clumping.LdBasedClumpingStep +defaults: + - ld_based_clumping + study_locus_input_path: ??? ld_index_path: ??? study_index_path: ??? diff --git a/config/step/ot_ld_index.yaml b/config/step/ot_ld_index.yaml new file mode 100644 index 000000000..70dc6b5ee --- /dev/null +++ b/config/step/ot_ld_index.yaml @@ -0,0 +1,4 @@ +defaults: + - ld_index + +ld_index_out: ${datasets.ld_index} diff --git a/config/step/locus_to_gene.yaml b/config/step/ot_locus_to_gene.yaml similarity index 82% rename from config/step/locus_to_gene.yaml rename to config/step/ot_locus_to_gene.yaml index 74c23b3d2..d055621ca 100644 --- a/config/step/locus_to_gene.yaml +++ b/config/step/ot_locus_to_gene.yaml @@ -1,8 +1,6 @@ -_target_: otg.l2g.LocusToGeneStep +defaults: + - locus_to_gene -session: - extended_spark_conf: - spark.dynamicAllocation.enabled: false run_mode: train wandb_run_name: null perform_cross_validation: false diff --git a/config/step/pics.yaml b/config/step/ot_pics.yaml similarity index 68% rename from config/step/pics.yaml rename to config/step/ot_pics.yaml index 4a656a876..851c4ca06 100644 --- a/config/step/pics.yaml +++ b/config/step/ot_pics.yaml @@ -1,3 +1,5 @@ -_target_: otg.pics.PICSStep +defaults: + - pics + study_locus_ld_annotated_in: ??? picsed_study_locus_out: ??? diff --git a/config/step/study_locus_overlap.yaml b/config/step/ot_study_locus_overlap.yaml similarity index 79% rename from config/step/study_locus_overlap.yaml rename to config/step/ot_study_locus_overlap.yaml index 02f9cd83d..acfcbdc94 100644 --- a/config/step/study_locus_overlap.yaml +++ b/config/step/ot_study_locus_overlap.yaml @@ -1,4 +1,6 @@ -_target_: otg.overlaps.OverlapsIndexStep +defaults: + - overlaps + study_locus_path: ${datasets.outputs}/credible_set study_index_path: ${datasets.outputs}/study_index overlaps_index_out: ${datasets.outputs}/study_locus_overlap diff --git a/config/step/ukbiobank.yaml b/config/step/ot_ukbiobank.yaml similarity index 74% rename from config/step/ukbiobank.yaml rename to config/step/ot_ukbiobank.yaml index fc7d7fb19..a1dcb6d9f 100644 --- a/config/step/ukbiobank.yaml +++ b/config/step/ot_ukbiobank.yaml @@ -1,3 +1,4 @@ -_target_: otg.ukbiobank.UKBiobankStep +defaults: + - ukbiobank ukbiobank_manifest: ${datasets.ukbiobank_manifest} ukbiobank_study_index_out: ${datasets.ukbiobank_study_index} diff --git a/config/step/v2g.yaml b/config/step/ot_v2g.yaml similarity index 88% rename from config/step/v2g.yaml rename to config/step/ot_v2g.yaml index bdd003861..1ac6d2fbe 100644 --- a/config/step/v2g.yaml +++ b/config/step/ot_v2g.yaml @@ -1,10 +1,12 @@ -_target_: otg.v2g.V2GStep +defaults: + - variant_to_gene + variant_index_path: ${datasets.variant_index} variant_annotation_path: ${datasets.variant_annotation} gene_index_path: ${datasets.gene_index} vep_consequences_path: ${datasets.vep_consequences} liftover_chain_file_path: ${datasets.chain_37_38} -intervals: +interval_sources: andersson: ${datasets.anderson} javierre: ${datasets.javierre} jung: ${datasets.jung} diff --git a/config/step/ot_variant_annotation.yaml b/config/step/ot_variant_annotation.yaml new file mode 100644 index 000000000..5da76b398 --- /dev/null +++ b/config/step/ot_variant_annotation.yaml @@ -0,0 +1,4 @@ +defaults: + - variant_annotation + +variant_annotation_path: ${datasets.variant_annotation} diff --git a/config/step/variant_index.yaml b/config/step/ot_variant_index.yaml similarity index 76% rename from config/step/variant_index.yaml rename to config/step/ot_variant_index.yaml index 88f84690b..1625c7126 100644 --- a/config/step/variant_index.yaml +++ b/config/step/ot_variant_index.yaml @@ -1,4 +1,6 @@ -_target_: otg.variant_index.VariantIndexStep +defaults: + - variant_index + variant_annotation_path: ${datasets.variant_annotation} credible_set_path: ${datasets.study_locus} variant_index_path: ${datasets.variant_index} diff --git a/config/step/window_based_clumping.yaml b/config/step/ot_window_based_clumping.yaml similarity index 50% rename from config/step/window_based_clumping.yaml rename to config/step/ot_window_based_clumping.yaml index 55cb952c8..0e7e0e39b 100644 --- a/config/step/window_based_clumping.yaml +++ b/config/step/ot_window_based_clumping.yaml @@ -1,5 +1,6 @@ -_target_: otg.window_based_clumping.WindowBasedClumpingStep +defaults: + - window_based_clumping + summary_statistics_input_path: ??? study_locus_output_path: ??? inclusion_list_path: ??? -locus_collect_distance: null diff --git a/config/step/session/dataproc.yaml b/config/step/session/dataproc.yaml index 0c8b1208a..6ac641718 100644 --- a/config/step/session/dataproc.yaml +++ b/config/step/session/dataproc.yaml @@ -1,4 +1,5 @@ -_target_: otg.common.session.Session +defaults: + - base_session + spark_uri: yarn -hail_home: /opt/conda/miniconda3/lib/python3.10/site-packages/hail write_mode: errorifexists diff --git a/config/step/session/local.yaml b/config/step/session/local.yaml deleted file mode 100644 index ac7f42591..000000000 --- a/config/step/session/local.yaml +++ /dev/null @@ -1,4 +0,0 @@ -_target_: otg.common.session.Session -spark_uri: local[*] -hail_home: ??? -write_mode: errorifexists diff --git a/config/step/variant_annotation.yaml b/config/step/variant_annotation.yaml deleted file mode 100644 index 1a8042ddb..000000000 --- a/config/step/variant_annotation.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_target_: otg.variant_annotation.VariantAnnotationStep - -session: - start_hail: true - -variant_annotation_path: ${datasets.variant_annotation} diff --git a/docs/development/airflow.md b/docs/development/airflow.md index 9768bf961..b73ad614e 100644 --- a/docs/development/airflow.md +++ b/docs/development/airflow.md @@ -7,12 +7,12 @@ This section describes how to set up a local Airflow server which will orchestra - [Docker](https://docs.docker.com/get-docker/) - [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) -!!!warning macOS Docker memory allocation +!!! warning macOS Docker memory allocation On macOS, the default amount of memory available for Docker might not be enough to get Airflow up and running. Allocate at least 4GB of memory for the Docker Engine (ideally 8GB). [More info](https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#) ## Configure Airflow access to Google Cloud Platform -!!!warning Specifying Google Cloud parameters +!!! warning Specifying Google Cloud parameters Run the next two command with the appropriate Google Cloud project ID and service account name to ensure the correct Google default application credentials are set up. Authenticate to Google Cloud: @@ -37,7 +37,7 @@ cd src/airflow ### Build Docker image -!!!note Custom Docker image for Airflow +!!! note Custom Docker image for Airflow The custom Dockerfile built by the command below extends the official [Airflow Docker Compose YAML](https://airflow.apache.org/docs/apache-airflow/stable/docker-compose.yaml). We add support for Google Cloud SDK, Google Dataproc operators, and access to GCP credentials. ```bash @@ -46,7 +46,7 @@ docker build . --tag extending_airflow:latest ### Set Airflow user ID -!!!note Setting Airflow user ID +!!! note Setting Airflow user ID These commands allow Airflow running inside Docker to access the credentials file which was generated earlier. ```bash diff --git a/docs/howto/.pages b/docs/howto/.pages new file mode 100644 index 000000000..eb8058f0f --- /dev/null +++ b/docs/howto/.pages @@ -0,0 +1 @@ +title: How-to diff --git a/docs/howto/_howto.md b/docs/howto/_howto.md new file mode 100644 index 000000000..1e8e26b83 --- /dev/null +++ b/docs/howto/_howto.md @@ -0,0 +1,5 @@ +# How-to + +This page contains a collection of how-to guides for the project. + +For additional information please visit [https://community.opentargets.org/](https://community.opentargets.org/) diff --git a/docs/howto/run_step_in_cli.md b/docs/howto/run_step_in_cli.md new file mode 100644 index 000000000..925a17f28 --- /dev/null +++ b/docs/howto/run_step_in_cli.md @@ -0,0 +1,44 @@ +--- +Title: Run step in CLI +--- + +# Run step in CLI + +To run a step in the command line interface (CLI), you need to know the step's name. To list what steps are avaiable in your current environment, simply run `otg` with no arguments. This will list all the steps: + +``` +You must specify 'step', e.g, step=