Skip to content

Commit

Permalink
feat: switch print to logging
Browse files Browse the repository at this point in the history
  • Loading branch information
maxibor committed Jul 24, 2024
1 parent 77f0d7f commit 89c55b0
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 59 deletions.
4 changes: 4 additions & 0 deletions pydamage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
__version__ = "0.80"
from pydamage.main import pydamage_analyze
import logging

logging.basicConfig(level=logging.INFO, format="%(message)s")

52 changes: 13 additions & 39 deletions pydamage/damage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import pysam
from pydamage.parse_damage import damage_al
from pydamage.model_fit import fit_models
from pydamage.contig_stats import get_contig_stats, get_ref_stats
from pydamage import models
import numpy as np
from numba import jit
from tqdm import tqdm
import logging

def sort_count_array_dict(int_array):
"""Sorts and counts unique values in an array
Expand Down Expand Up @@ -62,7 +63,7 @@ def get_damage(self, show_al):
self.no_mut = []
self.read_dict = {self.reference: dict()}
if self.reference is None:
iterator = tqdm(self.alignments)
iterator = tqdm(self.alignments, desc="Compute damage for entire reference")
else:
iterator = self.alignments
for al in iterator:
Expand Down Expand Up @@ -146,20 +147,7 @@ def compute_damage(self):
)


@jit(parallel=True)
def avg_coverage_contig(pysam_cov):
"""Computes average coverage of a reference

Args:
pysam_cov (np.array): Four dimensional array of coverage for each base
Returns:
float: mean coverage of reference
"""
return (
np.mean(
np.sum(pysam_cov, axis=0)
)
)


def check_model_fit(model_dict, wlen, verbose):
Expand All @@ -175,7 +163,7 @@ def check_model_fit(model_dict, wlen, verbose):
# Check that no fitted parameters or stdev are infinite
if np.isinf(np.array(model_dict["model_params"])).any():
if verbose:
print(f"Unreliable model fit for {model_dict['reference']}")
logging.warning(f"Unreliable model fit for {model_dict['reference']}")
return False

return model_dict
Expand All @@ -200,27 +188,11 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose)
al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process)
try:
if ref is None:
all_references = al_handle.references
print("Computing coverage")
cov = avg_coverage_contig(
np.concatenate(
[np.array(al_handle.count_coverage(contig=ref), dtype="uint16") for ref in tqdm(all_references)],
axis=1
)
)
print("Getting number of reads aligned")
nb_reads_aligned = np.sum(
[al_handle.count(contig=ref) for ref in tqdm(all_references)]
)
print("Getting reference length")
reflen = np.sum(
[al_handle.get_reference_length(ref) for ref in tqdm(all_references)]
)
logging.info("Computing alignment stats for entire reference")
reflen, cov, nb_reads_aligned = get_ref_stats(bam)
refname = "reference"
else:
cov = avg_coverage_contig(np.array(al_handle.count_coverage(contig=ref), dtype="uint16"))
nb_reads_aligned = al_handle.count(contig=ref)
reflen = al_handle.get_reference_length(ref)
reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref)
refname = ref

if subsample:
Expand All @@ -242,6 +214,8 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose)
all_damage,
) = al.compute_damage()

al_handle.close()

model_A = models.damage_model()
model_B = models.null_model()
test_res = fit_models(
Expand All @@ -266,15 +240,15 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose)
GA_log[f"GtoA-{i}"] = GA_damage[i]
test_res.update(CT_log)
test_res.update(GA_log)

return (check_model_fit(test_res, wlen, verbose), read_dict)

except (ValueError, RuntimeError) as e:
if verbose:
print(f"Model fitting for {ref} failed")
print(f"Model fitting error: {e}")
print(
logging.warning(f"Model fitting for {ref} failed")
logging.warning(f"Model fitting error: {e}")
logging.warning(
f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}"
f" - reflen: {reflen}\n"
)
al_handle.close()
return False
10 changes: 4 additions & 6 deletions pydamage/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pydamage.utils import df_to_csv
from pandas import read_csv
import os

import logging

def define_threshold(pydam_df, min_knee=0.5, alpha=0.05):
"""Find kneedle point in PyDamage results
Expand Down Expand Up @@ -32,8 +32,6 @@ def define_threshold(pydam_df, min_knee=0.5, alpha=0.05):
direction="decreasing",
online=True,
)
print(thresholds)
print(nb_contigs)
return kneedle.knee


Expand Down Expand Up @@ -63,13 +61,13 @@ def apply_filter(csv, threshold, outdir, alpha=0.05):
outfile = "pydamage_filtered_results.csv"
if threshold == 0:
threshold = define_threshold(df)
print(f"Optimal prediction accuracy threshold found to be: {threshold}")
logging.info(f"Optimal prediction accuracy threshold found to be: {threshold}")
filt_df = filter_pydamage_results(df, acc_thresh=threshold)
print(
logging.info(
f"Filtering PyDamage results with qvalue <= {alpha} and predicted_accuracy >= {threshold}"
)
if not os.path.exists(outdir):
os.mkdir(outdir)
df_to_csv(filt_df, outdir, outfile)
print(f"Filtered PyDamage results written to {outdir}/{outfile}")
logging.info(f"Filtered PyDamage results written to {outdir}/{outfile}")
return filt_df
17 changes: 9 additions & 8 deletions pydamage/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
import pysam
from pydamage import utils
import multiprocessing
from functools import partial
Expand All @@ -10,11 +9,11 @@
from pydamage.rescale import rescale_bam
from pydamage.models import glm_model_params
import os
import sys
from tqdm import tqdm
import warnings
from pydamage import __version__
from collections import ChainMap
import logging

def pydamage_analyze(
bam,
Expand Down Expand Up @@ -58,7 +57,7 @@ def pydamage_analyze(
raise ValueError("Cannot use subsample and rescale together")

if verbose:
print(f"Pydamage version {__version__}\n")
logging.info(f"Pydamage version {__version__}\n")
utils.makedir(outdir, force=force)

refs, mode = utils.prepare_bam(bam, minlen=minlen)
Expand Down Expand Up @@ -98,7 +97,7 @@ def pydamage_analyze(
verbose=verbose,
)
if group:
print("Estimating and testing Damage")
logging.info("Estimating and testing Damage")
filt_res, read_dict = damage.test_damage(
ref=None,
bam=bam,
Expand Down Expand Up @@ -133,14 +132,15 @@ def pydamage_analyze(
######################
######################

print(f"{len(filt_res)} contig(s) analyzed by Pydamage")
if not group:
logging.info(f"{len(filt_res)} contig(s) analyzed by Pydamage")
if len(filt_res) == 0:
warnings.warn(
"No alignments were found, check your alignment file", PyDamageWarning
)

if plot and len(filt_res) > 0:
print("\nGenerating Pydamage plots")
logging.info("Generating Pydamage plots")
plotdir = f"{outdir}/plots"
utils.makedir(plotdir, confirm=False)

Expand All @@ -153,7 +153,8 @@ def pydamage_analyze(
df_glm = glm_predict(prep_df_glm, glm_model_params)

df = df_glm.merge(df_pydamage, left_index=True, right_index=True)

utils.df_to_csv(df, outdir)

if rescale:
rescale_bam(
bam=bam,
Expand All @@ -165,5 +166,5 @@ def pydamage_analyze(
outname=os.path.join(outdir, "pydamage_rescaled.bam"),
threads=process,
)
utils.df_to_csv(df, outdir)

return df
1 change: 0 additions & 1 deletion pydamage/rescale.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def rescale_bam(
"CL": " ".join(sys.argv),
}
)
print(hd)
refs = al.references
with pysam.AlignmentFile(outname, "wb", threads=threads, header=hd) as out:
for ref in tqdm(refs, desc="Rescaling quality scores"):
Expand Down
6 changes: 3 additions & 3 deletions pydamage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
from typing import Tuple
from pysam import AlignmentFile

import logging

def check_extension(filename: str) -> str:
"""Check alignment file format to give correct open mode
Expand Down Expand Up @@ -38,7 +38,7 @@ def makedir(dirpath: str, confirm: bool = True, force: bool = False):
"""
if os.path.exists(dirpath):
if confirm and force is False:
print(
logging.warning(
f"Result directory, {dirpath}, already exists, it will be overwritten"
)
if input("Do You Want To Continue? (y|n) ").lower() != "y":
Expand Down Expand Up @@ -240,7 +240,7 @@ def prepare_bam(bam: str, minlen: int) -> Tuple[Tuple, str]:
alf = AlignmentFile(bam, mode)

if not alf.has_index():
print(
logging.error(
f"BAM file {bam} has no index. Sort BAM file and provide index "
"before running pydamage."
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_damage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def test_al_to_damage(bamfile):


def test_avg_coverage():
from pydamage.damage import avg_coverage_contig
from pydamage.contig_stats import compute_coverage_sum

cov = np.array([[1, 4, 5, 3], [2, 5, 6, 7], [5, 6, 3, 1], [2, 4, 8, 1]], np.int32)
assert avg_coverage_contig(cov) == 15.75
assert compute_coverage_sum(cov) == 63


def test_check_model_fit():
Expand Down

0 comments on commit 89c55b0

Please sign in to comment.