Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: deprecation of binom_test in scipy 1.12.0 and cryptic error occurring with incorrect VCFs in cyvcf2 0.30.26 #208

Merged
merged 5 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions trtools/dumpSTR/dumpSTR.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,13 @@ def main(args):
return 1
else:
raise te
except ValueError as ve:
message = ve.args[0]
if 'properly formatted' in message:
common.WARNING("Could not parse VCF.\n" + message)
return 1
else:
raise ve
if args.verbose:
common.MSG("Processing %s:%s"%(record.chrom, record.pos))
record_counter += 1
Expand Down
19 changes: 18 additions & 1 deletion trtools/qcSTR/qcSTR.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,24 @@ def main(args):

# read the vcf
numrecords = 0
for trrecord in harmonizer:
while True:
try:
trrecord = next(harmonizer)
except StopIteration: break
except TypeError as te:
message = te.args[0]
if 'missing' in message and 'mandatory' in message:
common.WARNING("Could not parse VCF.\n" + message)
return 1
else:
raise te
except ValueError as ve:
message = ve.args[0]
if 'properly formatted' in message:
common.WARNING("Could not parse VCF.\n" + message)
return 1
else:
raise ve
if args.numrecords is not None and numrecords >= args.numrecords: break
if args.period is not None and len(trrecord.motif) != args.period: continue

Expand Down
15 changes: 14 additions & 1 deletion trtools/utils/tr_harmonizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,7 @@ class TRRecordHarmonizer:
def __init__(self, vcffile: cyvcf2.VCF, vcftype: Union[str, VcfTypes] = "auto"):
self.vcffile = vcffile
self.vcftype = InferVCFType(vcffile, vcftype)
self._record_idx = None

def MayHaveImpureRepeats(self) -> bool:
"""
Expand Down Expand Up @@ -1619,6 +1620,18 @@ def __iter__(self) -> Iterator[TRRecord]:

def __next__(self) -> TRRecord:
"""Iterate over TRRecord produced from the underlying vcf."""
return HarmonizeRecord(self.vcftype, next(self.vcffile))
if self._record_idx is None:
self._record_idx = 1
self._record_idx += 1
try:
record = next(self.vcffile)
except StopIteration:
raise
except Exception:
raise ValueError(
"Unable to parse the "+str(self._record_idx)+"th tandem "
"repeat in the provided VCF. Check that it is properly formatted."
)
return HarmonizeRecord(self.vcftype, record)

# TODO check all users of this class for new options
6 changes: 5 additions & 1 deletion trtools/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,11 @@ def GetHardyWeinbergBinomialTest(allele_freqs, genotype_counts):
if gt[1] not in allele_freqs.keys():
return np.nan
if gt[0] == gt[1]: num_hom += genotype_counts[gt]
return scipy.stats.binom_test(num_hom, n=total_samples, p=exp_hom_frac)
try:
return scipy.stats.binom_test(num_hom, n=total_samples, p=exp_hom_frac)
except AttributeError:
# binom_test was deprecated in favor of binomtest in scipy 1.12.0
return scipy.stats.binomtest(num_hom, n=total_samples, p=exp_hom_frac).pvalue

def GetHomopolymerRun(seq):
r"""Compute the maximum homopolymer run length in a sequence
Expand Down
Loading