Skip to content

Commit

Permalink
fixed hard coded 24 region bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Julius Booth committed May 2, 2019
1 parent a764c87 commit 83b92bf
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 36 deletions.
10 changes: 6 additions & 4 deletions bin/prince
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ from prince import __version__

DEFAULT_K = 25
DEFAULT_BOOST_OUTPUT = resource_filename('prince.resources', 'training_data_w_extensions.txt')
DEFAULT_PRIMERS = resource_filename('prince.resources', 'TB_primers_extended.json')
DEFAULT_TEMPLATES = resource_filename('prince.resources', 'templates.fasta')

def main():
parser = argparse.ArgumentParser(description='Prince Options.')
Expand All @@ -21,7 +23,7 @@ def main():
help="output file for training data / training data used to predict copy numbers for queries")
parser.add_argument('-to', '--target_output', default="results/predictions.csv",
help="output file for query copy number predictions")
parser.add_argument('-tmp','--templates', default="templates.fasta",
parser.add_argument('-tmp','--templates', default=DEFAULT_TEMPLATES,
help="VNTR templates. Default is for M.TB")
parser.add_argument('-tf', '--target_file', default=None,
help="target genome names in a text file")
Expand All @@ -31,7 +33,7 @@ def main():
help="Kmer size used during read recruitment.")
parser.add_argument('-cn', '--copynumber', default=1,type=int,
help="Copy number for training genome.")
parser.add_argument('-p', '--primers', default="TB_primers_extended.json",
parser.add_argument('-p', '--primers', default=DEFAULT_PRIMERS,
help="Flanking sequences used in coverage adjustments")
parser.add_argument('-np', '--num_procs', default=1,type=int,
help="Number of cores for parallel processing.")
Expand All @@ -44,12 +46,12 @@ def main():
if prince_options.k != DEFAULT_K and prince_options.boost_output == DEFAULT_BOOST_OUTPUT:
warnings.warn("Warning: Target kmer size does not equal training settings. May lead to inaccurate predictions.")

with open(resource_filename('prince.resources', prince_options.primers)) as primers:
with open(prince_options.primers) as primers:
primers=json.load(primers)

#Template data initialized

templates = list(SeqIO.parse(resource_filename('prince.resources', prince_options.templates), "fasta"))
templates = list(SeqIO.parse(prince_options.templates, "fasta"))
templateNames = [t.id for t in templates]
templates = [str(t.seq) for t in templates]

Expand Down
6 changes: 3 additions & 3 deletions prince/match_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ def compute_match_score(query, template_obj, kmerLength, primers):

#Run reads through Fine Filtering to get score for each template
matchScore, flanking_coverage = fine_filtering(template_obj, recruitedReads, kmerLength, primers)
print(matchScore)
print(flanking_coverage)
print("VNTR Coverage: ", matchScore)
print("Flanking Coverage: ", flanking_coverage)
matchScore = [score/float(1+flanking_coverage[i]) for i,score in enumerate(matchScore)]
print(matchScore)
print("Adjusted Coverage: ", matchScore)
print("\n")
return matchScore, filename

4 changes: 2 additions & 2 deletions prince/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ def get_X_and_Y(data,template):
Y.append(cn)
return(X,Y)

def get_equations(data):
def get_equations(data, number_of_equations):
equations = []
for t in range(24):
for t in range(number_of_equations):
X,Y = get_X_and_Y(data,t)
X = np.array(X, dtype=np.float64)
Y = np.array(Y, dtype=np.float64)
Expand Down
4 changes: 3 additions & 1 deletion prince/query_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import multiprocessing as mp

def test_target(opts, template_obj, primers):
NUM_LOCI = len(primers)

# Get the query paths
with open(opts.target_file) as file:
queries = [line.rstrip("\n") for line in file]
Expand All @@ -18,7 +20,7 @@ def test_target(opts, template_obj, primers):

# Write results
data = get_data(opts.boost_output)
equations = get_equations(data)
equations = get_equations(data, NUM_LOCI)
with open(opts.target_output,'a+') as file:
if os.path.getsize(opts.target_output) == 0:
file.write("Templates,")
Expand Down
26 changes: 0 additions & 26 deletions prince/resources/TB_primers.json

This file was deleted.

0 comments on commit 83b92bf

Please sign in to comment.