1616from sgkit .model import DIM_VARIANT , create_genotype_call_dataset
1717from sgkit .typing import PathType
1818
19- DEFAULT_ALT_NUMBER = 3 # see vcf_read.py in scikit_allel
19+ DEFAULT_MAX_ALT_ALLELES = (
20+ 3 # equivalent to DEFAULT_ALT_NUMBER in vcf_read.py in scikit_allel
21+ )
2022
2123
2224@contextmanager
@@ -58,13 +60,13 @@ def vcf_to_zarr_sequential(
5860 ploidy : int = 2 ,
5961 mixed_ploidy : bool = False ,
6062 truncate_calls : bool = False ,
61- alt_number : int = DEFAULT_ALT_NUMBER ,
63+ max_alt_alleles : int = DEFAULT_MAX_ALT_ALLELES ,
6264) -> None :
6365
6466 with open_vcf (input ) as vcf :
6567 sample_id = np .array (vcf .samples , dtype = str )
6668 n_sample = len (sample_id )
67- n_allele = alt_number + 1
69+ n_allele = max_alt_alleles + 1
6870
6971 variant_contig_names = vcf .seqnames
7072
@@ -188,7 +190,7 @@ def vcf_to_zarr_parallel(
188190 ploidy : int = 2 ,
189191 mixed_ploidy : bool = False ,
190192 truncate_calls : bool = False ,
191- alt_number : int = DEFAULT_ALT_NUMBER ,
193+ max_alt_alleles : int = DEFAULT_MAX_ALT_ALLELES ,
192194) -> None :
193195 """Convert specified regions of one or more VCF files to zarr files, then concat, rechunk, write to zarr"""
194196
@@ -209,7 +211,7 @@ def vcf_to_zarr_parallel(
209211 ploidy = ploidy ,
210212 mixed_ploidy = mixed_ploidy ,
211213 truncate_calls = truncate_calls ,
212- alt_number = alt_number ,
214+ max_alt_alleles = max_alt_alleles ,
213215 )
214216
215217 ds = zarrs_to_dataset (paths , chunk_length , chunk_width , tempdir_storage_options )
@@ -229,7 +231,7 @@ def vcf_to_zarrs(
229231 ploidy : int = 2 ,
230232 mixed_ploidy : bool = False ,
231233 truncate_calls : bool = False ,
232- alt_number : int = DEFAULT_ALT_NUMBER ,
234+ max_alt_alleles : int = DEFAULT_MAX_ALT_ALLELES ,
233235) -> Sequence [str ]:
234236 """Convert VCF files to multiple Zarr on-disk stores, one per region.
235237
@@ -262,7 +264,7 @@ def vcf_to_zarrs(
262264 If True, genotype calls with more alleles than the specified (maximum) ploidy value
263265 will be truncated to size ploidy. If false, calls with more alleles than the
264266 specified ploidy will raise an exception.
265- alt_number
267+ max_alt_alleles
266268 The (maximum) number of alternate alleles in the VCF file. Any records with more than
267269 this number of alternate alleles will have the extra alleles dropped.
268270
@@ -313,7 +315,7 @@ def vcf_to_zarrs(
313315 ploidy = ploidy ,
314316 mixed_ploidy = mixed_ploidy ,
315317 truncate_calls = truncate_calls ,
316- alt_number = alt_number ,
318+ max_alt_alleles = max_alt_alleles ,
317319 )
318320 tasks .append (task )
319321 dask .compute (* tasks )
@@ -334,7 +336,7 @@ def vcf_to_zarr(
334336 ploidy : int = 2 ,
335337 mixed_ploidy : bool = False ,
336338 truncate_calls : bool = False ,
337- alt_number : int = DEFAULT_ALT_NUMBER ,
339+ max_alt_alleles : int = DEFAULT_MAX_ALT_ALLELES ,
338340) -> None :
339341 """Convert VCF files to a single Zarr on-disk store.
340342
@@ -393,7 +395,7 @@ def vcf_to_zarr(
393395 If True, genotype calls with more alleles than the specified (maximum) ploidy value
394396 will be truncated to size ploidy. If false, calls with more alleles than the
395397 specified ploidy will raise an exception.
396- alt_number
398+ max_alt_alleles
397399 The (maximum) number of alternate alleles in the VCF file. Any records with more than
398400 this number of alternate alleles will have the extra alleles dropped.
399401 """
@@ -437,7 +439,7 @@ def vcf_to_zarr(
437439 ploidy = ploidy ,
438440 mixed_ploidy = mixed_ploidy ,
439441 truncate_calls = truncate_calls ,
440- alt_number = alt_number ,
442+ max_alt_alleles = max_alt_alleles ,
441443 )
442444
443445
0 commit comments