Skip to content

Commit

Permalink
Merge pull request #450 from bento-platform/features/documents-result…
Browse files Browse the repository at this point in the history
…s-ingestion

feat: document experiment results ingestion
  • Loading branch information
noctillion authored Oct 27, 2023
2 parents 2817fd8 + bccb03b commit c24e121
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 1 deletion.
21 changes: 21 additions & 0 deletions chord_metadata_service/chord/workflows/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"WORKFLOW_EXPERIMENTS_JSON",
"WORKFLOW_FHIR_JSON",
"WORKFLOW_READSET",
"WORKFLOW_DOCUMENT",
"WORKFLOW_MAF_DERIVED_FROM_VCF_JSON",
"WORKFLOW_VCF2MAF",
"WORKFLOW_CBIOPORTAL",
Expand All @@ -29,6 +30,7 @@
WORKFLOW_EXPERIMENTS_JSON = "experiments_json"
WORKFLOW_FHIR_JSON = "fhir_json"
WORKFLOW_READSET = "readset"
WORKFLOW_DOCUMENT = "document"
WORKFLOW_MAF_DERIVED_FROM_VCF_JSON = "maf_derived_from_vcf_json"
WORKFLOW_VCF2MAF = "vcf2maf"
WORKFLOW_CBIOPORTAL = "cbioportal"
Expand Down Expand Up @@ -147,6 +149,25 @@ def json_file_output(id_: str, output_name: Optional[str] = None):
}
]
},
WORKFLOW_DOCUMENT: {
"name": "Document",
"description": "This workflow ingests into DRS documents which "
"have been already listed as experiment results. ",
"data_type": DATA_TYPE_EXPERIMENT_RESULT,
"file": "document.wdl",
"inputs": [
{
"id": "document_files",
"type": "file[]",
"required": True,
"extensions": [".pdf", ".csv", ".tsv", ".txt", ".doc", ".docx", ".xls", ".xlsx",
".jpeg", ".jpg", ".png", ".gif", ".md", ".mp3", ".m4a", ".mp4"]
},
DRS_URL_INPUT,
],
"outputs": [],

},
WORKFLOW_MAF_DERIVED_FROM_VCF_JSON: {
"name": "MAF files derived from VCF files as a JSON",
"description": "This ingestion workflow will add to the current experiment results "
Expand Down
48 changes: 48 additions & 0 deletions chord_metadata_service/chord/workflows/wdls/document.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
version 1.0

workflow document {
input {
Array[File] document_files
String run_dir
String drs_url
String project_id
String dataset_id
String secret__access_token
}

scatter(file in document_files) {
call post_to_drs {
input:
file_path = file,
drs_url = drs_url,
project_id = project_id,
dataset_id = dataset_id,
token = secret__access_token
}
}

output {
Array[String] drs_responses = post_to_drs.response_message
}
}

task post_to_drs {
input {
File file_path
String drs_url
String project_id
String dataset_id
String token
}
command {
curl -k -X POST \
-F "file=@~{file_path}" \
-F "project_id=~{project_id}" \
-F "dataset_id=~{dataset_id}" \
-H "Authorization: Bearer ~{token}" \
"~{drs_url}/ingest"
}
output {
String response_message = read_string(stdout())
}
}
3 changes: 2 additions & 1 deletion chord_metadata_service/experiments/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"file_format": {
"type": "string",
"enum": ["SAM", "BAM", "CRAM", "BAI", "CRAI", "VCF", "BCF", "MAF", "GVCF", "BigWig", "BigBed", "FASTA",
"FASTQ", "TAB", "SRA", "SRF", "SFF", "GFF", "TABIX", "UNKNOWN", "OTHER"]
"FASTQ", "TAB", "SRA", "SRF", "SFF", "GFF", "TABIX", "PDF", "CSV", "TSV", "JPEG", "PNG", "GIF",
"MARKDOWN", "MP3", "M4A", "MP4", "DOCX", "XLS", "XLSX", "UNKNOWN", "OTHER"]
},
"data_output_type": {
"type": "string",
Expand Down

0 comments on commit c24e121

Please sign in to comment.