-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #494 from kids-first/gen-workflow-output-doc-type
✨ Add Genomic Workflow Output Manifest
- Loading branch information
Showing
7 changed files
with
148 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
creator/extract_configs/templates/genomic_workflow_output_manifest_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
""" | ||
This is an extract config intended for Genomic Workflow Output Manifests | ||
produced by the Bix team. This manifest contains the list of files produced | ||
by the genomic harmonization workflows along with the attached specimens, | ||
and source genomic files. | ||
To use this extract config, you can make a copy of it and add it to your | ||
ingest package or you can import it as a module in an existing extract config | ||
and override at least the `source_data_url`. You may also append additional:w | ||
operations to the `operations` list as well. | ||
""" | ||
|
||
from kf_lib_data_ingest.common.concept_schema import CONCEPT | ||
from kf_lib_data_ingest.etl.extract.operations import ( | ||
keep_map, value_map, Split | ||
) | ||
|
||
source_data_url = "{{ download_url }}" | ||
|
||
operations = [ | ||
keep_map( | ||
in_col="Data Type", | ||
out_col=CONCEPT.GENOMIC_FILE.DATA_TYPE, | ||
), | ||
keep_map( | ||
in_col="Filepath", | ||
out_col=CONCEPT.GENOMIC_FILE.ID, | ||
), | ||
value_map( | ||
in_col="KF Biospecimen ID", | ||
m=lambda x: Split(x.split(",")), | ||
out_col=CONCEPT.BIOSPECIMEN.ID, | ||
), | ||
] |
18 changes: 18 additions & 0 deletions
18
creator/files/migrations/0022_add_genomic_workflow_output_type.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Generated by Django 2.2.13 on 2020-10-23 20:14 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('files', '0021_add_file_types'), | ||
] | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name='file', | ||
name='file_type', | ||
field=models.CharField(choices=[('OTH', 'OTH'), ('SEQ', 'SEQ'), ('SHM', 'SHM'), ('CLN', 'CLN'), ('DBG', 'DBG'), ('FAM', 'FAM'), ('S3S', 'S3S'), ('PDA', 'PDA'), ('FTR', 'FTR'), ('GWO', 'GWO')], default='OTH', max_length=3), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
""" | ||
This is an extract config intended for Genomic Workflow Output Manifests | ||
produced by the Bix team. This manifest contains the list of files produced | ||
by the genomic harmonization workflows along with the attached specimens, | ||
and source genomic files. | ||
To use this extract config, you can make a copy of it and add it to your | ||
ingest package or you can import it as a module in an existing extract config | ||
and override at least the `source_data_url`. You may also append additional:w | ||
operations to the `operations` list as well. | ||
""" | ||
|
||
from kf_lib_data_ingest.common.concept_schema import CONCEPT | ||
from kf_lib_data_ingest.etl.extract.operations import ( | ||
keep_map, value_map, Split | ||
) | ||
|
||
source_data_url = ( | ||
'https://localhost:5002/download/study/SD_ME0WME0W/' | ||
'file/SF_Y1JMXTTT/version/FV_4RYEMD72' | ||
) | ||
operations = [ | ||
keep_map( | ||
in_col="Data Type", | ||
out_col=CONCEPT.GENOMIC_FILE.DATA_TYPE, | ||
), | ||
keep_map( | ||
in_col="Filepath", | ||
out_col=CONCEPT.GENOMIC_FILE.ID, | ||
), | ||
value_map( | ||
in_col="KF Biospecimen ID", | ||
m=lambda x: Split(x.split(",")), | ||
out_col=CONCEPT.BIOSPECIMEN.ID, | ||
), | ||
] |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters