Skip to content

Commit

Permalink
smaller fixes to the summary.py
Browse files Browse the repository at this point in the history
  • Loading branch information
npbhavya committed Aug 1, 2024
1 parent e097fb9 commit 01940aa
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 35 deletions.
30 changes: 11 additions & 19 deletions sphae/workflow/rules/3.qc_qa.smk
Original file line number Diff line number Diff line change
Expand Up @@ -71,25 +71,21 @@ rule run_seqkit_short:
threads:
config['resources']['smalljob']['cpu'],
log:
os.path.join(dir_log, "seqkit", "{sample}_seqkit_short.log"),
os.path.join(dir_log, "seqkit", "{sample}_bases_short.log"),
shell:
"""
if [[ -s input.r1 ]]; then
seqkit stats {input.r1} -T > {params.r1_temp}
seqkit stats {input.r2} -T > {params.r2_temp}
seqkit stats {input.r1} -T > {params.r1_temp}
seqkit stats {input.r2} -T > {params.r2_temp}
# Extract numeric values from the second line of the output files
value1=$(awk 'NR==2 {{print $5}}' {params.r1_temp})
value2=$(awk 'NR==2 {{print $5}}' {params.r2_temp})
# Extract numeric values from the second line of the output files
value1=$(awk 'NR==2 {{print $5}}' {params.r1_temp})
value2=$(awk 'NR==2 {{print $5}}' {params.r2_temp})
# Add the values
sum=$((value1 + value2))
# Add the values
sum=$((value1 + value2))
# Write the sum to output.r
echo $sum > {output.r}
else
echo 0 > {output.r}
fi
# Write the sum to output.r
echo $sum > {output.r}
"""

rule filtlong_long:
Expand Down Expand Up @@ -135,9 +131,5 @@ rule run_seqkit_long:
os.path.join(dir_log, "{sample}_long_seqkit.log"),
shell:
"""
if [[ -s {input.fastq} ]] ; then
seqkit stats {input.fastq} -T -N 50 -N 90 | awk 'NR==2 {{print $5}}' > {output.r}
fi
echo 0 > {output.r}
seqkit stats {input.fastq} -T -N 50 -N 90 | awk 'NR==2 {{print $5}}' > {output.r}
"""
7 changes: 5 additions & 2 deletions sphae/workflow/scripts/pick_phage_contigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def picking_contigs(file,out):
datav = data[data["Length_x"] > 1000]
datav = datav[datav["Prediction"] == "Virus"]
datac = datav[datav["completeness"]> 70.00]
print (len(data))
print (data)
#print (len(data))
#print (datac)
else:
open(out, 'a').close()
return None
Expand All @@ -38,12 +38,15 @@ def picking_contigs(file,out):
#print ("entering this if statement")
if (datac["Connections"] == 0).any():
print("The genome is assembled, yay!")
#print("writing to", out)
datac.to_csv(out, index=False)
else:
print ("The genome has some regions that are frgamented, but mostly assembled")
print ("Take a look at the assembly graph file in bandage for more information on where the genome is fragmented")
datac.to_csv(out, index=False)

with open(out, 'a'):
os.utime(out, None)

picking_contigs(snakemake.input.csv, snakemake.output.out)

Expand Down
38 changes: 24 additions & 14 deletions sphae/workflow/scripts/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,27 +56,23 @@ def count_hypothetical_proteins(gbk_file):
count += 1
return count

def get_num(params):
length=(input_files['read'])
sum_len_value = length[0]

return sum_len_value

def write_single_genome_summary(input_files, summary):
with open(input_files['table'], 'r') as table_file:
lines = table_file.readlines()
if len(lines) > 1: # Ensure there is at least one data line
fields = lines[1].strip().split(',') # Split the second line into fields

read_length = get_num(params)
summary.write(f"Total length of reads after QC and subsampling:{read_length}\n")
filename = input_files['read']
with open(filename, 'r') as file:
contents = file.read()
summary.write(f"Total length of reads after QC and subsampling: {contents}\n")

# Ensure fields list has enough elements to access the desired indices
if len(fields) >= 23:
summary.write(f"Length: {lines[1].split(',')[2]}\n")

if fields[3].strip() == 'True':
summary.write("Circular: True\t")
summary.write("Circular: True\n")
else:
summary.write("Circular: False\n")

Expand Down Expand Up @@ -179,9 +175,9 @@ def write_single_genome_summary(input_files, summary):

#CRISPR spacers
if (len(open(input_files['spacers']).readlines()) == 0) and (len(open(input_files['acr']).readlines()) == 0):
summary.write("No CRISPR spacers or anti CRISPR spacers found\n")
summary.write("No anti CRISPR spacers found\n")
elif (len(open(input_files['spacers']).readlines()) != 0):
summary.write("CRISPR spacers found\n")
summary.write("anti-CRISPR spacers found\n")
shutil.copy(input_files['spacers'], outdir)
elif (len(open(input_files['acr']).readlines()) != 0):
summary.write("anti-CRISPR spacers found\n")
Expand All @@ -197,8 +193,10 @@ def write_single_genome_summary(input_files, summary):
def write_multiple_genome_summary(input_files, summary):
summary.write("Multiple phages assembled from this sample\n")
summary.write("Their characteristics are:\n")
read_length = get_num(params)
summary.write(f"Total length of reads after QC and subsampling:{read_length}\n")
filename = input_files['read']
with open(filename, 'r') as file:
contents = file.read()
summary.write(f"Total length of reads after QC and subsampling: {contents}\n")
summary.write("\n\n")

with open(input_files['table'], 'r') as table_file:
Expand Down Expand Up @@ -336,7 +334,7 @@ def write_multiple_genome_summary(input_files, summary):
out_spacers = f"{outdir}/{samplenames}_pharokka_crispr.tsv"
out_acr = f"{outdir}/{samplenames}_phold_acr.tsv"
if spacers_lines_count == 0 and phold_lines_count == 0:
summary.write("No CRISPR spacers or anti CRISPR spacers found\n")
summary.write("No anti CRISPR spacers found\n")
elif spacers_lines_count != 0:
summary.write("anti-CRISPR spacers found\n")
shutil.copy(spacers_pattern, out_spacers)
Expand Down Expand Up @@ -381,14 +379,26 @@ def analyze_assembly(input_files, output_summary, params):
elif line_count == 1 :
with open(output_summary, 'w') as summary:
summary.write(f"Sample: {params['sample']}\n")
filename = input_files['read']
with open(filename, 'r') as file:
contents = file.read()
summary.write(f"Total length of reads after QC and subsampling: {contents}\n")
summary.write("No contigs from the assembly were assigned viral, likely contigs too short in size\n")
else:
with open(output_summary, 'w') as summary:
summary.write(f"Sample: {params['sample']}\n")
filename = input_files['read']
with open(filename, 'r') as file:
contents = file.read()
summary.write(f"Total length of reads after QC and subsampling: {contents}\n")
summary.write("No contigs from the assembly were assigned viral, likely contigs too short in size\n")
else:
with open(output_summary, 'w') as summary:
summary.write(f"Sample: {params['sample']}\n")
filename = input_files['read']
with open(filename, 'r') as file:
contents = file.read()
summary.write(f"Total length of reads after QC and subsampling: {contents}\n")
summary.write("Failed during assembly\n")

# Replace input_files and output_params with the actual paths to your input/output files and parameters
Expand Down

0 comments on commit 01940aa

Please sign in to comment.