Skip to content

Commit

Permalink
Merge branch 'ah_var_store' into rc-vs-268-import-more-samples
Browse files Browse the repository at this point in the history
  • Loading branch information
RoriCremer authored Jan 24, 2022
2 parents 6c6a22c + 3aa74a5 commit 009c332
Show file tree
Hide file tree
Showing 8 changed files with 636 additions and 14 deletions.
11 changes: 11 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ workflows:
- ah_var_store
- rsa_rescattering_wf
- rc_add_dropstate_extract_wdl
- kc_ranges_prepare
- name: GvsImportGenomes
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsImportGenomes.wdl
Expand All @@ -138,6 +139,16 @@ workflows:
- master
- ah_var_store
- ah_flag_in_prepare
- name: GvsPrepareRangesCallset
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl
testParameterFiles:
- /scripts/variantstore/wdl/GvsPrepareRangesCallset.example.inputs.json
filters:
branches:
- master
- ah_var_store
- kc_ranges_prepare
- name: GvsSitesOnlyVCF
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsSitesOnlyVCF.wdl
Expand Down
15 changes: 12 additions & 3 deletions scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,20 @@ workflow GvsExtractCallset {
Int? extract_maxretries_override
Int? split_intervals_disk_size_override

String mode = "RANGES"
String mode = "RANGES-PREPARED"

String? service_account_json_path

String output_file_base_name
String? output_gcs_dir
File? gatk_override = "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/kc_fix_type_20211222/gatk-package-4.2.0.0-451-gbfb465a-SNAPSHOT-local.jar"
File? gatk_override = "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/kc_ranges_prepare_20220118/gatk-package-4.2.0.0-462-gc0e684c-SNAPSHOT-local.jar"
Int local_disk_for_extract = 150

String fq_samples_to_extract_table = "~{data_project}.~{default_dataset}.~{extract_table_prefix}__SAMPLES"
String fq_cohort_extract_table = "~{data_project}.~{default_dataset}.~{extract_table_prefix}__DATA"

String fq_ranges_cohort_vet_extract_table = "~{data_project}.~{default_dataset}.~{extract_table_prefix}__VET_DATA"
String fq_ranges_cohort_ref_extract_table = "~{data_project}.~{default_dataset}.~{extract_table_prefix}__REF_DATA"
}

Array[String] tables_patterns_for_datetime_check = if (mode == "RANGES") then ["pet_%","vet_%"] else ["~{extract_table_prefix}__%"]
Expand Down Expand Up @@ -87,6 +90,8 @@ workflow GvsExtractCallset {
interval_index = i,
intervals = SplitIntervals.interval_files[i],
fq_cohort_extract_table = fq_cohort_extract_table,
fq_ranges_cohort_ref_extract_table = fq_ranges_cohort_ref_extract_table,
fq_ranges_cohort_vet_extract_table = fq_ranges_cohort_vet_extract_table,
read_project_id = query_project,
mode = mode,
do_not_filter_override = do_not_filter_override,
Expand Down Expand Up @@ -147,6 +152,8 @@ task ExtractTask {
String? drop_state

String fq_cohort_extract_table
String fq_ranges_cohort_ref_extract_table
String fq_ranges_cohort_vet_extract_table
String read_project_id
String output_file
String? output_gcs_dir
Expand Down Expand Up @@ -208,8 +215,10 @@ task ExtractTask {
~{"--indels-truth-sensitivity-filter-level " + indels_truth_sensitivity_filter_level}'
fi

if [ ~{mode} = "RANGES" ]; then
if [ ~{mode} = "RANGES-RAW" ]; then
MODE_ARGS="--mode RANGES --vet-ranges-fq-dataset ~{fq_ranges_dataset} "
elif [ ~{mode} = "RANGES-PREPARED" ]; then
MODE_ARGS="--mode RANGES --vet-ranges-extract-fq-table ~{fq_ranges_cohort_vet_extract_table} --ref-ranges-extract-fq-table ~{fq_ranges_cohort_ref_extract_table} "
else
MODE_ARGS="--mode PET --cohort-extract-table ~{fq_cohort_extract_table} "
fi
Expand Down
150 changes: 150 additions & 0 deletions scripts/variantstore/wdl/GvsPrepareRangesCallset.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
version 1.0

workflow GvsPrepareCallset {
input {
String data_project
String default_dataset
String destination_cohort_table_prefix
File? sample_names_to_extract

# inputs with defaults
String query_project = data_project
Array[String]? query_labels
String destination_project = data_project
String destination_dataset = default_dataset

String fq_petvet_dataset = "~{data_project}.~{default_dataset}"
String fq_sample_mapping_table = "~{data_project}.~{default_dataset}.sample_info"
String fq_temp_table_dataset = "~{destination_project}.temp_tables"
String fq_destination_dataset = "~{destination_project}.~{destination_dataset}"

Int temp_table_ttl_in_hours = 72
Boolean skip_vet_new_insert = false
String? service_account_json_path
String? docker
}

String docker_final = select_first([docker, "us.gcr.io/broad-dsde-methods/variantstore:kc_ranges_prepare_2022_01_18"])

call PrepareRangesCallsetTask {
input:
destination_cohort_table_prefix = destination_cohort_table_prefix,
sample_names_to_extract = sample_names_to_extract,
query_project = query_project,
query_labels = query_labels,
fq_petvet_dataset = fq_petvet_dataset,
fq_sample_mapping_table = fq_sample_mapping_table,
fq_temp_table_dataset = fq_temp_table_dataset,
fq_destination_dataset = fq_destination_dataset,
temp_table_ttl_in_hours = temp_table_ttl_in_hours,
service_account_json_path = service_account_json_path,
docker = docker_final
}

output {
String fq_cohort_extract_table_prefix = PrepareRangesCallsetTask.fq_cohort_extract_table_prefix
}

}

task PrepareRangesCallsetTask {
# indicates that this task should NOT be call cached
meta {
volatile: true
}

input {
String destination_cohort_table_prefix
File? sample_names_to_extract
String query_project
Array[String]? query_labels

String fq_petvet_dataset
String fq_sample_mapping_table
String fq_temp_table_dataset
String fq_destination_dataset
Int temp_table_ttl_in_hours

String? service_account_json_path
String docker
}
# Note the coercion of optional query_labels using select_first([expr, default])
Array[String] query_label_args = if defined(query_labels) then prefix("--query_labels ", select_first([query_labels])) else []

String has_service_account_file = if (defined(service_account_json_path)) then 'true' else 'false'
String use_sample_names_file = if (defined(sample_names_to_extract)) then 'true' else 'false'
String sample_list_param = if (defined(sample_names_to_extract)) then '--sample_names_to_extract sample_names_file' else '--fq_cohort_sample_names ' + fq_sample_mapping_table

parameter_meta {
sample_names_to_extract: {
localization_optional: true
}
}

command <<<
set -e

echo ~{sample_list_param}

if [ ~{has_service_account_file} = 'true' ]; then
gsutil cp ~{service_account_json_path} local.service_account.json
SERVICE_ACCOUNT_STANZA="--sa_key_path local.service_account.json "
fi

if [ ~{use_sample_names_file} = 'true' ]; then
gsutil cp ~{sample_names_to_extract} sample_names_file
fi

python3 /app/create_ranges_cohort_extract_data_table.py \
--fq_ranges_dataset ~{fq_petvet_dataset} \
--fq_temp_table_dataset ~{fq_temp_table_dataset} \
--fq_destination_dataset ~{fq_destination_dataset} \
--destination_cohort_table_prefix ~{destination_cohort_table_prefix} \
~{sample_list_param} \
--query_project ~{query_project} \
~{sep=" " query_label_args} \
--fq_sample_mapping_table ~{fq_sample_mapping_table} \
--ttl ~{temp_table_ttl_in_hours} \
$SERVICE_ACCOUNT_STANZA
>>>

output {
String fq_cohort_extract_table_prefix = "~{fq_destination_dataset}.~{destination_cohort_table_prefix}" # implementation detail of create_cohort_extract_data_table.py
}

runtime {
docker: docker
memory: "3 GB"
disks: "local-disk 100 HDD"
bootDiskSizeGb: 15
preemptible: 0
cpu: 1
}

}

task LocalizeFile {
input {
String file
String service_account_json_path
}

command {
set -euo pipefail

gsutil cp ~{service_account_json_path} local.service_account.json
gcloud auth activate-service-account --key-file=local.service_account.json
gsutil cp '~{file}' .
}

output {
File localized_file = basename(file)
}

runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0"
memory: "3.75 GiB"
cpu: "1"
disks: "local-disk 50 HDD"
}
}
1 change: 1 addition & 0 deletions scripts/variantstore/wdl/extract/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ RUN apt-get update && apt-get -y upgrade && apt-get -y install bcftools
# Copy the application source code.
COPY create_cohort_extract_data_table.py /app
COPY create_variant_annotation_table.py /app
COPY create_ranges_cohort_extract_data_table.py /app
COPY extract_subpop.py /app
COPY populate_alt_allele_table.py /app
COPY alt_allele_positions.sql /app
Expand Down
Loading

0 comments on commit 009c332

Please sign in to comment.