Skip to content

Commit

Permalink
vsnp3 version 3.24
Browse files Browse the repository at this point in the history
  • Loading branch information
stuber committed Aug 22, 2024
1 parent f461495 commit 69745d0
Show file tree
Hide file tree
Showing 27 changed files with 86 additions and 43 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Tested with Python 3.8 - 3.9.
Anaconda [setup](./docs/instructions/conda_instructions.md)

```
conda create -c conda-forge -c bioconda -n vsnp3 vsnp3=3.23
conda create -c conda-forge -c bioconda -n vsnp3 vsnp3=3.24
```

## Installation test
Expand Down
13 changes: 7 additions & 6 deletions bin/vsnp3_alignment_vcf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import subprocess
Expand Down Expand Up @@ -336,7 +336,7 @@ def latex(self, tex, groups=None):
print(r'\end{adjustbox}', file=tex)
print(r'\begin{adjustbox}{width=1\textwidth}', file=tex)

print(r'\begin{tabular}{ l | l | l | l | l | l }', file=tex)
print(r'\begin{tabular}{ l | l | l | l | l | l | l }', file=tex)
print(r'Mapped Paired Reads & Mapped Single Reads & Unmapped Reads & Unmapped Percent & \multicolumn{2}{l}{Unmapped Assembled Contigs} \\', file=tex)
print(r'\hline', file=tex)
mapped_reads = self.READS_PAIRED + self.READS_SINGLE
Expand All @@ -346,16 +346,16 @@ def latex(self, tex, groups=None):
print(r'\hline', file=tex)
print(r'\hline', file=tex)

print(r'Duplicate Paired Reads & Duplicate Single Reads & \multicolumn{4}{l}{Duplicate Percent of Mapped Reads} \\', file=tex)
print(r'Duplicate Paired Reads & Duplicate Single Reads & \multicolumn{5}{l}{Duplicate Percent of Mapped Reads} \\', file=tex)
print(r'\hline', file=tex)
print(f'{self.DUPLICATE_PAIR:,} & {self.DUPLICATE_SINGLE:,} & ' + r'\multicolumn{4}{l}{' + f'{(self.DUPLICATION_RATIO*100):,.1f}' + r'\%} \\', file=tex)
print(f'{self.DUPLICATE_PAIR:,} & {self.DUPLICATE_SINGLE:,} & ' + r'\multicolumn{5}{l}{' + f'{(self.DUPLICATION_RATIO*100):,.1f}' + r'\%} \\', file=tex)
print(r'\hline', file=tex)
print(r'\hline', file=tex)

print(f'BAM File & Reference Length & Genome with Coverage & Average Depth & No Coverage Bases & Quality SNPs \\\\', file=tex)
print(f'BAM File & Reference Length & Genome with Coverage & Average Depth & No Coverage Bases & Ambiguous SNPs & Quality SNPs \\\\', file=tex)
print(r'\hline', file=tex)
bam = self.zero_coverage.bam.replace('_', '\_')
print(f'{bam} & {self.zero_coverage.reference_length:,} & {(self.zero_coverage.genome_coverage*100):,.2f}\% & {self.zero_coverage.ave_coverage:,.1f}X & {self.zero_coverage.total_zero_coverage:,} & {self.zero_coverage.good_snp_count:,} \\\\', file=tex)
print(f'{bam} & {self.zero_coverage.reference_length:,} & {(self.zero_coverage.genome_coverage*100):,.2f}\% & {self.zero_coverage.ave_coverage:,.1f}X & {self.zero_coverage.total_zero_coverage:,} & {self.zero_coverage.ac1_count:,} & {self.zero_coverage.good_snp_count:,} \\\\', file=tex)
print(r'\hline', file=tex)

if groups:
Expand Down Expand Up @@ -386,6 +386,7 @@ def excel(self, excel_dict):
excel_dict['Average Depth'] = f'{self.zero_coverage.ave_coverage:,.1f}X'
excel_dict['No Coverage Bases'] = f'{self.zero_coverage.total_zero_coverage:,}'
excel_dict['Percent Ref with Zero Coverage'] = f'{self.zero_coverage.percent_ref_with_zero_coverage:,.6f}%'
excel_dict['Ambiguous SNPs'] = f'{self.zero_coverage.ac1_count:,}'
excel_dict['Quality SNPs'] = f'{self.zero_coverage.good_snp_count:,}'


Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_annotation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import shutil
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_assembly.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_best_reference_sourmash.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import subprocess
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_bruc_mlst.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import io
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_download_GCA_fasta_get_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_download_fasta_gbk_gff_by_acc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import argparse
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_excel_merge_defining_snps.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import re
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_fasta_to_fastq.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

__version__ = "3.23"
__version__ = "3.24"

import gzip
import os
Expand Down
5 changes: 3 additions & 2 deletions bin/vsnp3_fasta_to_snps_table.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import subprocess
Expand Down Expand Up @@ -312,7 +312,8 @@ def excel_formatter(self, df_json, write_to, group=None):
# sample_path_name = self.sample_path_name
st = self.st
table_df = pd.read_json(df_json, orient='split')
writer = pd.ExcelWriter(write_to, engine='xlsxwriter')
writer = pd.ExcelWriter(write_to, engine='xlsxwriter',)
writer.book.use_zip64()
table_df.to_excel(writer, sheet_name='Sheet1')
wb = writer.book
ws = writer.sheets['Sheet1']
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_fastq_stats_seqkit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import subprocess
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_file_setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import shutil
Expand Down
7 changes: 5 additions & 2 deletions bin/vsnp3_group_on_defining_snps.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down Expand Up @@ -370,8 +370,11 @@ def __init__(self, cwd=None, metadata=None, excel_remove=None, gbk_list=None, de
# print(f'\n\nTotal Time: {datetime.now() - self.beginTime}\n')

#Add back those that where a group was not found
if 'Group Not Found' not in groupings_dict:
groupings_dict['Group Not Found'] = {}

for sample in samples_without_group_set:
groupings_dict = {**groupings_dict, 'Group Not Found': {sample: None}}
groupings_dict['Group Not Found'][sample] = pd.DataFrame()
self.groupings_dict = groupings_dict # will be passed to html summary

def group_selection(self, abs_pos):
Expand Down
20 changes: 16 additions & 4 deletions bin/vsnp3_group_reporter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import io
Expand Down Expand Up @@ -100,6 +100,7 @@ def bin_and_html_table(self, filename, found_positions, found_positions_mix):
defining_snps = self.defining_snps
inverted_defining_snps = self.inverted_defining_snps
try:
sample_groups_list = []
defining_snp = False
for abs_position in list(defining_snps.keys() & (found_positions.keys() | found_positions_mix.keys())): #absolute positions in set union of two list
group = defining_snps[abs_position]
Expand All @@ -112,10 +113,21 @@ def bin_and_html_table(self, filename, found_positions, found_positions_mix):
group = inverted_defining_snps[abs_position]
sample_groups_list.append(group)
defining_snp = True
if defining_snp:
sample_groups_list = sorted(sample_groups_list)
else:

if defining_snp is False: # extra step to get the group name when there are mutliple defining snps for a group.
for abs_position in list(defining_snps.keys()):
set_abs_position = set(abs_position.split(", "))
set_found_positions = set(found_positions.keys())
is_subset = set_abs_position.issubset(set_found_positions)
if is_subset:
group = defining_snps[abs_position]
sample_groups_list.append(group)

if len(sample_groups_list) == 0:
sample_groups_list = ['No defining SNPs']
else:
sample_groups_list = sorted(sample_groups_list)

except TypeError:
message = f'File TypeError'
print(f'{message}: {filename}')
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_html_step2_summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os

Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_kernel_plots.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import re
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_path_adder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import glob
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_reference_options.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_remove_from_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down
4 changes: 2 additions & 2 deletions bin/vsnp3_spoligotype.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import gzip
Expand Down Expand Up @@ -163,7 +163,7 @@ def spoligo(self):
count_summary = pull.compute()
count_summary = OrderedDict(sorted(count_summary.items()))
spoligo_binary_dictionary = {}
self.call_cut_off = 4
self.call_cut_off = 2
for k, v in count_summary.items():
if v > self.call_cut_off:
spoligo_binary_dictionary.update({k: 1})
Expand Down
4 changes: 2 additions & 2 deletions bin/vsnp3_step1.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down Expand Up @@ -132,7 +132,7 @@ def run(self,):
except AttributeError:
pass
self.MYCO = MYCO
if MYCO and self.spoligo:
if self.spoligo:
spoligo = Spoligo(SAMPLE_NAME=self.sample_name, FASTQ_R1=self.FASTQ_R1, FASTQ_R2=self.FASTQ_R2, debug=self.debug)
spoligo.spoligo()
spoligo.latex(self.latex_report.tex)
Expand Down
7 changes: 5 additions & 2 deletions bin/vsnp3_step2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import sys
Expand Down Expand Up @@ -218,7 +218,10 @@ def __init__(self, runtime=None, vcf_to_df=None, reference=None, groupings_dict=
print("<tr>", file=htmlfile)
print(f"<td>{key}</td>", end='\t', file=htmlfile)
for group in value:
print(f"<td>{group}</td>", end='\t', file=htmlfile)
if group == "Group Not Found":
print(f'<td><span style="color: red;">{group}</span></td>', end='\t', file=htmlfile)
else:
print(f"<td>{group}</td>", end='\t', file=htmlfile)
print("</tr>", file=htmlfile)
print("</table>", file=htmlfile)

Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_table_compare.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import re
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_vcf_annotation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import re
Expand Down
2 changes: 1 addition & 1 deletion bin/vsnp3_vcf_merge_to_fasta.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import re
Expand Down
31 changes: 27 additions & 4 deletions bin/vsnp3_zero_coverage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

__version__ = "3.23"
__version__ = "3.24"

import os
import re
Expand All @@ -22,6 +22,28 @@ class Zero_Coverage(Setup):
'''
def __init__(self, FASTA=None, bam=None, vcf=None, debug=False):

def count_ac1_positions(vcf_file):
ac1_count = 0

with open(vcf_file, 'r') as f:
for line in f:
# Skip header lines
if line.startswith('#'):
continue

# Split the line into fields
fields = line.strip().split('\t')

# Check if INFO field contains AC=1
info = fields[7]
if 'AC=1' in info.split(';'):
ac1_count += 1

return ac1_count

self.ac1_count = count_ac1_positions(vcf)
print(f"Number of positions with AC=1: {self.ac1_count:,}")

Setup.__init__(self, FASTA=FASTA, debug=debug)
self.print_run_time('Zero Coverage')
self.sample_name = re.sub('[_.].*', '', bam)
Expand Down Expand Up @@ -116,11 +138,11 @@ def latex(self, tex):
print(r'\end{center}', file=tex)
print(r'\end{adjustbox}', file=tex)
print(r'\begin{adjustbox}{width=1\textwidth}', file=tex)
print(r'\begin{tabular}{ l | l | l | l | l | l | l }', file=tex)
print(f'BAM File & Reference Length & Genome with Coverage & Average Depth & No Coverage Bases & Quality SNPs \\\\', file=tex)
print(r'\begin{tabular}{ l | l | l | l | l | l | l | l }', file=tex)
print(f'BAM File & Reference Length & Genome with Coverage & Average Depth & No Coverage Bases & Ambiguous SNPs & Quality SNPs \\\\', file=tex)
print(r'\hline', file=tex)
bam = self.bam.replace('_', '\_')
print(f'{bam} & {self.reference_length:,} & {(self.genome_coverage*100):,.2f}\% & {self.ave_coverage:,.1f}X & {self.total_zero_coverage:,} & {self.good_snp_count:,} \\\\', file=tex)
print(f'{bam} & {self.reference_length:,} & {(self.genome_coverage*100):,.2f}\% & {self.ave_coverage:,.1f}X & {self.total_zero_coverage:,} & {self.ac1_count:,} & {self.good_snp_count:,} \\\\', file=tex)
print(r'\hline', file=tex)
print(r'\end{adjustbox}', file=tex)
print(r'\vspace{0.1 mm}', file=tex)
Expand All @@ -136,6 +158,7 @@ def excel(self, excel_dict):
excel_dict['Average Depth'] = f'{self.ave_coverage:,.1f}'
excel_dict['No Coverage Bases'] = f'{self.total_zero_coverage:,}'
excel_dict['Percent Ref with Zero Coverage'] = f'{self.percent_ref_with_zero_coverage:,.6f}%'
excel_dict['Ambiguous SNPs'] = f'{self.ac1_count:,}'
excel_dict['Quality SNPs'] = f'{self.good_snp_count:,}'


Expand Down

0 comments on commit 69745d0

Please sign in to comment.