-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial pf8 gcs PR ready Pf8() PR ready Pf8() * file path change * s3 access rewired via fsspec * fixes the check for annotation file * removes extra path * altlen commented in test units * missed fix for altlen * fixes for PR feedbacks * fix-2 for PR feedbacks * fix for syntax error * chained urls into s3 fs * make handling of chained URLs consistent between GCS and S3 * fix bug --------- Co-authored-by: Alistair Miles <alimanfoo@googlemail.com>
- Loading branch information
Showing
7 changed files
with
542 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import os | ||
|
||
from .plasmodium import PlasmodiumDataResource | ||
|
||
|
||
class Pf8(PlasmodiumDataResource): | ||
"""Provides access to data from the Pf8 release. | ||
Parameters | ||
---------- | ||
url : str, optional | ||
Base path to data. Default uses Google Cloud Storage "gs://pf8-release/", | ||
or specify a local path on your file system if data have been downloaded. | ||
data_config : str, optional | ||
Path to config for structure of Pf8 data resource. Defaults to config included | ||
with the malariagen_data package. | ||
**kwargs | ||
Passed through to fsspec when setting up file system access. | ||
Examples | ||
-------- | ||
Access data from Google Cloud Storage (default): | ||
>>> import malariagen_data | ||
>>> pf8 = malariagen_data.Pf8() | ||
Access data downloaded to a local file system: | ||
>>> pf8 = malariagen_data.Pf8("/local/path/to/pf8-release/") | ||
""" | ||
|
||
def __init__( | ||
self, | ||
url=None, | ||
data_config=None, | ||
**kwargs, | ||
): | ||
# setup filesystem | ||
if not data_config: | ||
working_dir = os.path.dirname(os.path.abspath(__file__)) | ||
data_config = os.path.join(working_dir, "pf8_config.json") | ||
super().__init__(data_config=data_config, url=url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
{ | ||
"default_url": "gs://pf8-release/", | ||
"metadata_path": "metadata/Pf8_samples.txt", | ||
"reference_path": "reference/PlasmoDB-54-Pfalciparum3D7-Genome.zarr/", | ||
"reference_contigs": [ | ||
"Pf3D7_01_v3", | ||
"Pf3D7_02_v3", | ||
"Pf3D7_03_v3", | ||
"Pf3D7_04_v3", | ||
"Pf3D7_05_v3", | ||
"Pf3D7_06_v3", | ||
"Pf3D7_07_v3", | ||
"Pf3D7_08_v3", | ||
"Pf3D7_09_v3", | ||
"Pf3D7_10_v3", | ||
"Pf3D7_11_v3", | ||
"Pf3D7_12_v3", | ||
"Pf3D7_13_v3", | ||
"Pf3D7_14_v3", | ||
"Pf3D7_API_v3", | ||
"Pf3D7_MIT_v3" | ||
], | ||
"annotations_path": "annotations/PlasmoDB-55_Pfalciparum3D7.gff.gz", | ||
"variant_calls_zarr_path": "zarr/", | ||
"default_variant_variables": { | ||
"FILTER_PASS": ["variants"], | ||
"is_snp": ["variants"], | ||
"numalt": ["variants"], | ||
"CDS": ["variants"] | ||
}, | ||
"extended_calldata_variables": { | ||
"DP": ["variants", "samples"], | ||
"GQ": ["variants", "samples"], | ||
"MIN_DP": ["variants", "samples"], | ||
"PGT": ["variants", "samples"], | ||
"PID": ["variants", "samples"], | ||
"PS": ["variants", "samples"], | ||
"RGQ": ["variants", "samples"], | ||
"PL": ["variants", "samples", "genotypes"], | ||
"SB": ["variants", "samples", "sb_statistics"] | ||
}, | ||
"extended_variant_fields": { | ||
"AC": ["variants", "alt_alleles"], | ||
"AF": ["variants", "alt_alleles"], | ||
"AN": ["variants"], | ||
"ANN_AA_length": ["variants", "alt_alleles"], | ||
"ANN_AA_pos": ["variants", "alt_alleles"], | ||
"ANN_Allele": ["variants", "alt_alleles"], | ||
"ANN_Annotation": ["variants", "alt_alleles"], | ||
"ANN_Annotation_Impact": ["variants", "alt_alleles"], | ||
"ANN_CDS_length": ["variants", "alt_alleles"], | ||
"ANN_CDS_pos": ["variants", "alt_alleles"], | ||
"ANN_Distance": ["variants", "alt_alleles"], | ||
"ANN_Feature_ID": ["variants", "alt_alleles"], | ||
"ANN_Feature_Type": ["variants", "alt_alleles"], | ||
"ANN_Gene_ID": ["variants", "alt_alleles"], | ||
"ANN_Gene_Name": ["variants", "alt_alleles"], | ||
"ANN_HGVS_c": ["variants", "alt_alleles"], | ||
"ANN_HGVS_p": ["variants", "alt_alleles"], | ||
"ANN_Rank": ["variants", "alt_alleles"], | ||
"ANN_Transcript_BioType": ["variants", "alt_alleles"], | ||
"ANN_cDNA_length": ["variants", "alt_alleles"], | ||
"ANN_cDNA_pos": ["variants", "alt_alleles"], | ||
"AS_BaseQRankSum": ["variants", "alt_alleles"], | ||
"AS_FS": ["variants", "alt_alleles"], | ||
"AS_InbreedingCoeff": ["variants", "alt_alleles"], | ||
"AS_MQ": ["variants", "alt_alleles"], | ||
"AS_MQRankSum": ["variants", "alt_alleles"], | ||
"AS_QD": ["variants", "alt_alleles"], | ||
"AS_ReadPosRankSum": ["variants", "alt_alleles"], | ||
"AS_SOR": ["variants", "alt_alleles"], | ||
"BaseQRankSum": ["variants"], | ||
"DP": ["variants"], | ||
"DS": ["variants"], | ||
"END": ["variants"], | ||
"ExcessHet": ["variants"], | ||
"FILTER_Apicoplast": ["variants"], | ||
"FILTER_Centromere": ["variants"], | ||
"FILTER_InternalHypervariable": ["variants"], | ||
"FILTER_LowQual": ["variants"], | ||
"FILTER_Low_VQSLOD": ["variants"], | ||
"FILTER_Mitochondrion": ["variants"], | ||
"FILTER_SubtelomericHypervariable": ["variants"], | ||
"FILTER_SubtelomericRepeat": ["variants"], | ||
"FILTER_VQSRTrancheINDEL99.50to99.60": ["variants"], | ||
"FILTER_VQSRTrancheINDEL99.60to99.80": ["variants"], | ||
"FILTER_VQSRTrancheINDEL99.80to99.90": ["variants"], | ||
"FILTER_VQSRTrancheINDEL99.90to99.95": ["variants"], | ||
"FILTER_VQSRTrancheINDEL99.95to100.00+": ["variants"], | ||
"FILTER_VQSRTrancheINDEL99.95to100.00": ["variants"], | ||
"FILTER_VQSRTrancheSNP99.50to99.60": ["variants"], | ||
"FILTER_VQSRTrancheSNP99.60to99.80": ["variants"], | ||
"FILTER_VQSRTrancheSNP99.80to99.90": ["variants"], | ||
"FILTER_VQSRTrancheSNP99.90to99.95": ["variants"], | ||
"FILTER_VQSRTrancheSNP99.95to100.00+": ["variants"], | ||
"FILTER_VQSRTrancheSNP99.95to100.00": ["variants"], | ||
"FS": ["variants"], | ||
"ID": ["variants"], | ||
"InbreedingCoeff": ["variants"], | ||
"LOF": ["variants"], | ||
"MLEAC": ["variants", "alt_alleles"], | ||
"MLEAF": ["variants", "alt_alleles"], | ||
"MQ": ["variants"], | ||
"MQRankSum": ["variants"], | ||
"NEGATIVE_TRAIN_SITE": ["variants"], | ||
"NMD": ["variants"], | ||
"POSITIVE_TRAIN_SITE": ["variants"], | ||
"QD": ["variants"], | ||
"QUAL": ["variants"], | ||
"RAW_MQandDP": ["variants", "ploidy"], | ||
"ReadPosRankSum": ["variants"], | ||
"RegionType": ["variants"], | ||
"SOR": ["variants"], | ||
"VQSLOD": ["variants"], | ||
"culprit": ["variants"], | ||
"set": ["variants"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.