diff --git a/build_sp b/build_sp
new file mode 100644
index 0000000..9ba31e5
--- /dev/null
+++ b/build_sp
@@ -0,0 +1,387 @@
+#!/bin/bash
+#############################################
+############################################
+# build species database
+############################################
+############################################
+
+cd $WDR
+GDR=$(readlink -f $GEN_DIR)
+echo "$package option activated"
+echo "$WDR is present directory" 
+echo "$GDR is the genomes directory"
+echo "$ODR is the output directory"
+
+cd $GDR
+exec 3>&2
+exec 2> /dev/null
+if [ "$LIST" == "false" ]; then
+ls {*.fna,*.fa,*.fasta} > $ODR/genomes.txt #suppress error to std output
+else
+cat $LIS > $ODR/genomes.txt
+fi
+exec 2>&3
+
+echo "####### PRE PROCESSING GENOMES #####
+"
+
+####################
+exec 3>&2
+exec 2> /dev/null
+for f in `cat $ODR/genomes.txt`
+do
+awk '{if (substr($0,1) ~ "plasmid" ) censor=1; else if (substr($0,1,1) == ">") censor=0; if (censor==0) print $0}' $f > $ODR/$f.noplasmid
+
+cd $ODR
+awk '{if (substr($0,1) ~ "phage" ) censor=1; else if (substr($0,1,1) == ">") censor=0; if (censor==0) print $0}' $f.noplasmid > $f.noplasmid1
+awk '{if (substr($0,1) ~ "extrachomosomal" ) censor=1; else if (substr($0,1,1) == ">") censor=0; if (censor==0) print $0}' $f.noplasmid1 > $f.noplasmid
+rename $f.noplasmid $f $f.noplasmid
+rm $f.noplasmid1
+num_of_contigs=$(grep -c ">" $f)
+echo -e "$f\t$num_of_contigs" >> num_of_contigs.txt
+cd $GDR
+done
+exec 2>&3
+###################
+
+cd $ODR
+
+if [[ ! -s num_of_contigs.txt ]]; then 
+echo "ERROR: Appropriate fasta files not found in Genomes directory"
+exit 1
+fi
+
+awk '$2 == 1' num_of_contigs.txt | cut -f1 > reference_genomes.txt
+awk 'FNR==NR { a[$NF]; next } !($NF in a)' reference_genomes.txt <(cut -f1 num_of_contigs.txt ) >  draft_genomes.txt 
+
+rm num_of_contigs.txt
+rm genomes.txt
+mkdir reordered_contigs
+#################################################
+
+if [ -s reference_genomes.txt ]; then 
+echo "Complete reference genome(s) detected" 
+else
+echo "ERROR: At least, a complete reference genome is required to reorder contigs of draft genomes"
+rm reference_genomes.txt draft_genomes.txt
+exit 1
+fi
+
+
+for f in `cat reference_genomes.txt`
+do
+cd $GDR
+grep -v ">" $f | tr -d '[:space:]' | fold -w 60 | sed "1 i\>$f" | sed -e '$a\ ' > $ODR/reordered_contigs/$f.fna
+cd $ODR
+done
+
+######### get position of dnaA ######
+cd reordered_contigs
+ls *.fna > reordered_ref_genomes
+cat $(cat reordered_ref_genomes) > $ODR/ref.fa
+rm reordered_ref_genomes
+cd $ODR
+
+blastn -query ref.fa -db $SMEG_DIR/dnaA_database/dnaa.fasta -evalue 0.05 -num_threads $NUM_THREAD -max_target_seqs 1 -out dnaa_id -word_size 11 -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen" || exit 1
+
+
+cut -f1 dnaa_id | sort | uniq > temp.txt
+for i in `cat temp.txt`
+do
+grep -w "$i" dnaa_id | sort -nr -k12,12 | head -1 | cut -f1,7,13 >> temp.dnaA_output.txt
+done
+
+###### User defined Representative genome
+if [ "x$REPGEN" != "x" ]; then
+ grep -P '(^|\s)\K'$REPGEN'(?=\s|$)' temp.dnaA_output.txt | awk  '{printf "%s\t%4.3f\n", $1 , $2/$3 }'  > dnaA_output.txt 
+else
+awk  '{printf "%s\t%4.3f\n", $1 , $2/$3 }' temp.dnaA_output.txt | sort -n -k2,2 | head -1 > dnaA_output.txt
+fi
+
+rm temp* dnaa_id ref.fa
+
+
+if [[ ! -s dnaA_output.txt ]]; then
+repGenome=$(head -1 reference_genomes.txt)
+dnaA_pos=0.5
+else
+repGenome=$(cut -f1 dnaA_output.txt)
+dnaA_pos=$(cut -f2 dnaA_output.txt)
+fi
+
+rm dnaA_output.txt
+#########################
+
+no_of_comp_genomes=$(grep -c "." reference_genomes.txt)
+no_of_draft_genomes=$(grep -c "." draft_genomes.txt)
+
+echo "Number of complete genomes = $no_of_comp_genomes" >> log.txt
+echo "Number of draft genomes = $no_of_draft_genomes" >> log.txt
+echo "Selected representative genome is $repGenome" >> log.txt
+echo "dnaA position relative to ori is $dnaA_pos " >> log.txt
+echo " " >> log.txt
+ 
+if [ -s draft_genomes.txt ]; then
+############## Reorder contigs using Mauve ###########################
+echo "####### RE-ORDERING CONTIGS OF DRAFT GENOME(S) (MAUVE)#####
+"
+for i in `cat draft_genomes.txt`
+do
+mkdir $i.dir
+prefix=$(type -P Mauve |  rev | cut -d'/' -f3- | rev)
+mauvepath=$(find $prefix -name "Mauve.jar")
+echo "java -cp $mauvepath -Djava.awt.headless=true org.gel.mauve.contigs.ContigOrderer -output $ODR/$i.dir -ref $ODR/reordered_contigs/$repGenome.fna -draft $ODR/$i" >> parallel_commands
+done
+cat parallel_commands | parallel '{}'  >> Mauve.error.log.txt 2>&1
+
+rm parallel_commands
+
+for i in `cat draft_genomes.txt`
+do
+cd $i.dir
+aa=$(ls alignment*/$i.fas | sort | tail -1) || exit 1
+grep -v ">" $aa | tr -d '[:space:]' | fold -w 60 | sed "1 i\>$i" | sed -e '$a\ ' > $ODR/reordered_contigs/$i.fna || exit 1
+cd ../
+done
+rm Mauve.error.log.txt
+else
+echo "Only complete genomes identified"
+fi
+
+
+exec 3>&2
+exec 2> /dev/null
+rm reference_genomes.txt
+rm draft_genomes.txt
+rm -rf *.dir
+rm temp.txt
+rm {*.fna,*.fa,*.fasta}
+exec 2>&3
+
+########### Run prokka ################
+cd $ODR/reordered_contigs
+ls *.fna > ../genomes.txt
+cd $ODR
+mkdir gff
+for f in `cat genomes.txt`
+do
+aa=$( echo "$f" | rev | cut -d'.' -f2- | rev)
+cat $ODR/reordered_contigs/$f | sed 's/>.*/>temp/' > $ODR/$f
+echo "prokka --quiet --kingdom Bacteria --outdir $aa.dir --locustag $aa --prefix $aa $ODR/$f" >> parallel_commands
+done
+
+echo "####### ANNOTATING GENOMES (PROKKA) #####
+"
+
+
+cat parallel_commands | parallel '{}'  >> prokka.error.log.txt 2>&1
+for f in *.dir
+do
+cp $f/*.gff gff/. || exit 1
+rm -rf $f
+done
+
+exec 3>&2
+exec 2> /dev/null
+rm {*.fna,*.fa,*.fasta}
+exec 2>&3
+
+rm prokka.error.log.txt
+rm parallel_commands
+rm genomes.txt
+############ Run Roary ####################
+cd $ODR
+echo "####### CORE-GENOME ANALYSIS (ROARY) #####
+"
+roary -p $NUM_THREAD -f ./Roary -e -n -s $ODR/gff/*.gff || exit 1
+rm -rf gff/
+
+
+################ Generate phylogenetic tree #########
+
+FastTree -nt -quiet -gtr $ODR/Roary/core_gene_alignment.aln > tree.newick || exit 1
+
+
+######## If -e flag is chosen ######
+if [ "$REF_ONLY" == "true" ]; then
+Rscript $SMEG_DIR/create_clusters.R -i clusterOutput.txt -c 0
+cp $ODR/Roary/core_alignment_header.embl . 
+cp $ODR/Roary/core_gene_alignment.aln .
+if [ "$KEEP" == "false" ]; then
+rm -rf $ODR/Roary
+fi
+ exit 0
+fi
+
+
+############ Now build SMEG database ########
+cp reordered_contigs/$repGenome.fna .
+ 
+echo "####### BUILDING SMEG DATABASE(S) #####
+"
+
+cat <<EOT >> parallel.sh
+#!/bin/bash
+if [ "$IGNORE_ITER" == "true" ]; then
+count=2
+else
+count=0
+fi
+
+touch breakCluster.txt
+until [ ! \$count -lt 3 ]
+do
+Rscript $SMEG_DIR/create_clusters.R -i clusterOutput.txt -c 0
+
+if ! [[ -s clusterOutput.txt ]]; then echo "ERROR: clusterOutput.txt empty. Unable to generate clusters"; exit 1 ; fi
+$SMEG_DIR/uniqueClusterSNP $ODR/Roary/core_gene_alignment.aln clusterOutput.txt $NUM_THREAD $SAT ClusterSNPs.txt
+cut -f3 ClusterSNPs.txt | grep -v "cluster" | sort | uniq -c | sed -r 's/^ *([0-9]+)/\1\t/' | awk '\$2 != 0 && \$1 < '$CNT'' | cut -f2 > breakCluster.txt || exit 1
+num_of_cluS_rerun=\$(grep -c "." breakCluster.txt)
+count=\`expr \$count + 1\`
+echo "iteration count \$count"
+
+if [ \$num_of_cluS_rerun -lt 1 ]
+then
+count=\`expr \$count + 100000\`
+fi
+done
+
+rm breakCluster.txt
+awk '\$3 != 0' ClusterSNPs.txt > ClusterSNPs_final.txt
+rm ClusterSNPs.txt
+
+samtools faidx $ODR/Roary/core_gene_alignment.aln $repGenome > $repGenome.aln
+Rscript $SMEG_DIR/getPositionWithoutGaps.R -i ClusterSNPs_final.txt -x $repGenome.aln -m 0
+
+grep "label=" $ODR/Roary/core_alignment_header.embl | cut -f2 -d'=' > core_genes.txt
+grep "feature" $ODR/Roary/core_alignment_header.embl | rev | cut -d' ' -f1 | rev | sed 's/\../ /g' | awk '{print (\$1 - 1) "\t" \$2}' | sed "s/^/$repGenome	/" > core_gene_coordinate_in_align
+
+paste -d'\t' core_gene_coordinate_in_align core_genes.txt > bedfile
+rm core_gene_coordinate_in_align core_genes.txt
+
+samtools faidx $repGenome.aln
+bedtools getfasta -fi $repGenome.aln -bed bedfile -name | sed 's/-//g'  | fold -w 60  >> core_genes2.fa
+awk 'BEGIN {RS = ">" ; FS = "\n" ; ORS = ""} \$2 {print ">"\$0}' core_genes2.fa > core_genes.fa
+rm core_genes2.fa
+rm bedfile
+
+awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length(\$0)}END{print seqlen}' core_genes.fa | grep -v ">" > geneLengths
+grep ">" core_genes.fa | sed 's/>//g' > geneNames
+paste <(cat geneLengths | awk '{total += \$0; \$0 = total - \$0}1') <(cat geneLengths | awk '{total += \$0; \$0 = total}1') <(cat geneNames) --delimiters '\t' > geneCoordinates.txt
+rm geneLengths geneNames
+########
+makeblastdb -in $repGenome.fna -parse_seqids -dbtype nucl
+blastn -query core_genes.fa -db $repGenome.fna -evalue 5 -num_threads 1 -max_target_seqs 1 -outfmt 6 -out $repGenome.blast.txt -word_size 11
+
+cut -f1 $repGenome.blast.txt | sort | uniq > tmp.txt
+
+for i in \`cat tmp.txt\`
+do
+grep -w "\$i" $repGenome.blast.txt | sort -nr -k12,12 | head -1 | cut -f2,9,10 > temp1
+grep -w "\$i" $repGenome.blast.txt | sort -nr -k12,12 | head -1 | cut -f1 > temp2
+paste -d'\t' temp1 temp2 >> $repGenome.core.geneCood.txt
+done
+
+rm temp* tmp.txt
+rm $repGenome.blast.txt core_genes.fa
+
+########################
+sed '1d' modified_uniq_cluster_SNPs.txt | cut -f1 | sort | uniq > clusters.txt || exit 1
+###########
+Rscript $SMEG_DIR/getPositioninRef.R -i modified_uniq_cluster_SNPs.txt -x $repGenome.core.geneCood.txt -y geneCoordinates.txt
+##########
+
+for strains in \`cat clusters.txt\`
+do
+grep -P '(^|\s)\K'\$strains'(?=\s|$)' newcoordinates.txt  > \$strains.Input.txt || exit 1
+done
+
+rm newcoordinates.txt modified_uniq_cluster_SNPs.txt ClusterSNPs_final.txt tree.newick
+rm $repGenome.aln $repGenome.aln.fai
+rm $repGenome.fna.n*
+
+mkdir Index
+bowtie2-build $repGenome.fna Index/$repGenome.fna -q || exit 1
+samtools faidx $repGenome.fna
+
+cp $ODR/Roary/core_alignment_header.embl .
+cp $ODR/Roary/core_gene_alignment.aln .
+
+awk -F'\t' 'NR==FNR{c[\$1]++;next};c[\$2] > 0' <(grep -w -v -f <(cat clusters.txt | rev | cut -d'.' -f3 | rev | cut -c8-) <(cut -f2 clusterOutput.txt)) clusterOutput.txt > clusters_with_no_unique_SNP.txt
+printf '1\ni\nStrain\tclusterID\n.\nw\n' | ed -s clusters_with_no_unique_SNP.txt
+EOT
+
+if [ "$AUTO" == "false" ]; then
+echo "bash parallel.sh" >> parallel_commands
+else
+thresholds=$(echo "0.4 0.5 0.6 0.7 0.8 0.9")
+for z in `echo $thresholds` 
+do 
+sed "s/clusterOutput.txt $NUM_THREAD $SAT/clusterOutput.txt $NUM_THREAD $z/g" parallel.sh | sed "s/\"$IGNORE_ITER\" ==/\"false\" ==/g" | sed "2i mkdir F.$z" | sed "3i cd F.$z" | sed "4i cp $ODR/tree.newick ." | sed "5i cp $ODR/$repGenome.fna ." > parallel.$z.F.sh
+sed "s/clusterOutput.txt $NUM_THREAD $SAT/clusterOutput.txt $NUM_THREAD $z/g" parallel.sh | sed "s/\"$IGNORE_ITER\" ==/\"true\" ==/g" | sed "2i mkdir T.$z" | sed "3i cd T.$z" | sed "4i cp $ODR/tree.newick ." | sed "5i cp $ODR/$repGenome.fna ." > parallel.$z.T.sh
+echo "bash parallel.$z.F.sh" >> parallel_commands
+echo "bash parallel.$z.T.sh" >> parallel_commands
+done
+fi
+
+cat parallel_commands | parallel '{}' | grep -v "sites screened" >> smeg.error.log.txt 2>&1 || exit 1
+rm parallel_commands parallel.*
+
+if [ "$AUTO" == "false" ]; then
+unclustered_strains_count=$(sed '1d' clusters_with_no_unique_SNP.txt | wc -l)
+clusters_wo_uniq=$(sed '1d' clusters_with_no_unique_SNP.txt | cut -f2 | sort | uniq | wc -l)
+total_cluster_count=$(grep -c "." clusters.txt)
+
+echo "Total number of clusters = $total_cluster_count" >> log.txt
+echo "Could not generate unique SNPs for $clusters_wo_uniq clusters containing a total of $unclustered_strains_count strains" >> log.txt 
+echo "See clusters_with_no_unique_SNP.txt for more details" >> log.txt 
+echo " "
+else
+iterative_yes=$(echo "F.0.4 F.0.5 F.0.6 F.0.7 F.0.8 F.0.9")
+iterative_no=$(echo "T.0.4 T.0.5 T.0.6 T.0.7 T.0.8 T.0.9")
+for f in `echo $iterative_yes`
+do
+unclustered_strains_count=$(sed '1d' $f/clusters_with_no_unique_SNP.txt | wc -l)
+clusters_wo_uniq=$(sed '1d' $f/clusters_with_no_unique_SNP.txt | cut -f2 | sort | uniq | wc -l)
+total_cluster_count=$(grep -c "." $f/clusters.txt)
+assign_thres=$(cut -f2- -d'.' <<< $f)
+median_cluster_SNPs=$(wc -l $f/*.Input.txt | grep -v "total" | sed -r 's/^ *([0-9]+)/\1\t/' | cut -f1 | sort -n | awk '{arr[NR]=$1}END { if (NR%2==1) print arr[(NR+1)/2]; else print (arr[NR/2]+arr[NR/2+1])/2}')
+head -4 log.txt > $f/misc.txt
+
+echo "### SNP assignment threshold $assign_thres with iterative clustering output ######" >> log.txt
+echo "Total number of clusters = $total_cluster_count" >> log.txt
+echo "Median unique SNPs in clusters = $median_cluster_SNPs" >> log.txt
+echo "Could not generate unique SNPs for $clusters_wo_uniq clusters containing a total of $unclustered_strains_count strains" >> log.txt
+echo "See $ODR/$f/clusters_with_no_unique_SNP.txt for more details" >> log.txt
+echo "Database created with above parameters located in $ODR/$f " >> log.txt
+echo "################################################################################## 
+
+" >> log.txt
+done
+
+for f in `echo $iterative_no`
+do
+unclustered_strains_count=$(sed '1d' $f/clusters_with_no_unique_SNP.txt | wc -l)
+clusters_wo_uniq=$(sed '1d' $f/clusters_with_no_unique_SNP.txt | cut -f2 | sort | uniq | wc -l)
+total_cluster_count=$(grep -c "." $f/clusters.txt)
+assign_thres=$(cut -f2- -d'.' <<< $f)
+median_cluster_SNPs=$(wc -l $f/*.Input.txt | grep -v "total" | sed -r 's/^ *([0-9]+)/\1\t/' | cut -f1 | sort -n | awk '{arr[NR]=$1}END { if (NR%2==1) print arr[(NR+1)/2]; else print (arr[NR/2]+arr[NR/2+1])/2}')
+head -4 log.txt > $f/misc.txt
+
+echo "### SNP assignment threshold $assign_thres without iterative clustering output ######" >> log.txt
+echo "Total number of clusters = $total_cluster_count" >> log.txt
+echo "Median unique SNPs in clusters = $median_cluster_SNPs" >> log.txt
+echo "Could not generate unique SNPs for $clusters_wo_uniq clusters containing a total of $unclustered_strains_count strains" >> log.txt
+echo "See $f/clusters_with_no_unique_SNP.txt for more details" >> log.txt
+echo "Database created with above parameters located in $ODR/$f " >> log.txt
+echo "################################################################################## 
+
+" >> log.txt
+done
+fi
+if [ "$KEEP" == "false" ]; then
+rm -rf $ODR/Roary
+fi
+
+rm smeg.error.log.txt
+echo "####### DONE #####"
diff --git a/growth_est_ref b/growth_est_ref
new file mode 100644
index 0000000..6febdf7
--- /dev/null
+++ b/growth_est_ref
@@ -0,0 +1,247 @@
+#!/bin/bash
+#############################################
+############################################
+# growth estimation reference-based
+############################################
+############################################
+
+cd $WDR
+RDR=$(readlink -f $READS_DIR)
+if  [ $DESMAN == "false" ]; then SPEC=$(readlink -f $SPECIES_DIR) ; fi
+echo "$package option activated"
+echo "$WDR is present directory"
+echo "$RDR is the reads directory"
+echo "$ODR is the output directory"
+echo "Reference-based method activated"
+
+################
+
+cd $ODR
+########################
+# USE REFERENCE GENOME TO GET POSITION OF CORE GENES
+###########################
+if [ $DESMAN == "false" ]; then
+if ! [[ -s $GEN_LIS ]]; then echo "ERROR: Input file is empty"; exit 1 ; fi
+samtools faidx $SPEC/core_gene_alignment.aln $(cat $GEN_LIS) > strains.aln || exit 1
+$SMEG_DIR/uniqueSNPmultithreading strains.aln 2 2 || exit 1
+
+if ! [[ -s uniqueSNPs ]]; then echo "ERROR: The generated uniqueSNPs file is empty. Check your input file or GCC version"; exit 1 ; fi 
+
+###############################
+###############################
+for ref in `cat $GEN_LIS`
+do
+reference=$(echo "$ref" | rev | cut -d'/' -f1 | rev) || exit 1
+
+if [[ -e "$SPEC/misc.txt" ]]; then
+cp $SPEC/../reordered_contigs/$reference.fna .
+else
+cp $SPEC/reordered_contigs/$reference.fna . || exit 1
+fi
+
+samtools faidx $SPEC/core_gene_alignment.aln $reference > $reference.aln || exit 1
+Rscript $SMEG_DIR/getPositionWithoutGaps.R -i uniqueSNPs -x $reference.aln -m 1 || exit 1
+
+grep "label=" $SPEC/core_alignment_header.embl | cut -f2 -d'=' > core_genes.txt || exit 1
+grep "feature" $SPEC/core_alignment_header.embl | rev | cut -d' ' -f1 | rev | sed 's/\../	/g' | awk '{print ($1 - 1) "\t" $2}' | sed "s/^/$reference	/" > core_gene_coordinate_in_align || exit 1
+
+paste -d'\t' core_gene_coordinate_in_align core_genes.txt > bedfile
+rm core_gene_coordinate_in_align core_genes.txt
+
+bedtools getfasta -fi $reference.aln -bed bedfile -name | sed 's/-//g'  | fold -w 60  >> core_genes2.fa || exit 1
+awk 'BEGIN {RS = ">" ; FS = "\n" ; ORS = ""} $2 {print ">"$0}' core_genes2.fa > core_genes.fa || exit 1
+rm core_genes2.fa
+rm bedfile
+
+awk '/^>/ {if (seqlen){print seqlen}; print ;seqlen=0;next; } { seqlen += length($0)}END{print seqlen}' core_genes.fa | grep -v ">" > geneLengths
+grep ">" core_genes.fa | sed 's/>//g' > geneNames
+paste <(cat geneLengths | awk '{total += $0; $0 = total - $0}1') <(cat geneLengths | awk '{total += $0; $0 = total}1') <(cat geneNames) --delimiters '\t' > geneCoordinates.txt
+rm geneLengths geneNames
+########
+makeblastdb -in $reference.fna -parse_seqids -dbtype nucl || exit 1
+blastn -query core_genes.fa -db $reference.fna -evalue 5 -num_threads $NUM_THREAD -max_target_seqs 1 -outfmt 6 -out $reference.blast.txt -word_size 11 || exit 1
+
+cut -f1 $reference.blast.txt | sort | uniq > tmp.txt
+
+for i in `cat tmp.txt`
+do
+grep -w "$i" $reference.blast.txt | sort -nr -k12,12 | head -1 | cut -f2,9,10 > temp1 || exit 1
+grep -w "$i" $reference.blast.txt | sort -nr -k12,12 | head -1 | cut -f1 > temp2 || exit 1
+paste -d'\t' temp1 temp2 >> $reference.core.geneCood.txt
+done
+
+rm temp* tmp.txt
+rm $reference.blast.txt core_genes.fa
+
+###########
+# EXTRACT UNIQUE SNPS FROM INPUT STRAINS
+##########
+
+Rscript $SMEG_DIR/getPositioninRef.R -i modified_uniq_cluster_SNPs.txt -x $reference.core.geneCood.txt -y geneCoordinates.txt
+###########
+
+grep -P '(^|\s)\K'$reference'(?=\s|$)' newcoordinates.txt | sort | uniq > $reference.Input.txt || exit 1
+
+rm newcoordinates.txt
+rm $reference.fna.nsq
+rm $reference.fna.nsi
+rm $reference.fna.nsd
+rm $reference.fna.nog
+rm $reference.fna.nin
+rm $reference.fna.nhr
+rm $reference.core.geneCood.txt geneCoordinates.txt
+bowtie2-build $reference.fna $reference.fna -q || exit 1
+echo "$reference" >> strain_list.txt
+done
+rm strains.aln uniqueSNPs modified_uniq_cluster_SNPs.txt
+################
+#####
+# DESMAN strains
+else
+################
+################
+if ! [[ -s $DESM ]]; then echo "ERROR: Input file is empty"; exit 1 ; fi
+for f in `cat $DESM`
+do
+cp $f .
+reference=$(echo "$f" | rev | cut -d'/' -f1 | rev) || exit 1
+grep -v ">" $reference | tr -d '[:space:]' | fold -w 60 | sed "1 i\>$reference" | sed -e '$a\ ' >> allStrains.fna
+done
+
+samtools faidx allStrains.fna || exit 1
+$SMEG_DIR/uniqueSNPmultithreading allStrains.fna 2 2 || exit 1
+if ! [[ -s uniqueSNPs ]]; then echo "ERROR: The generated uniqueSNPs file is empty. Check your input file or GCC version"; exit 1 ; fi
+sed '1d' uniqueSNPs | cut -f1 | sort | uniq > passed_strains.txt
+
+for strains in `cat passed_strains.txt`
+do
+grep -P '(^|\s)\K'$strains'(?=\s|$)' uniqueSNPs  > $strains.Input.txt || exit 1
+echo "$strains" >> strain_list.txt
+cat $strains | grep -v ">" | tr -d '[:space:]' | fold -w 60 | sed "1 i\>$strains" | sed -e '$a\ ' > $strains.fna
+bowtie2-build $strains.fna $strains.fna -q || exit 1
+rm $strains
+done
+
+##############
+
+rm uniqueSNPs allStrains.fna.fai
+rm passed_strains.txt
+
+fi
+#######################################
+#######################################
+####  Estimate growth rate
+####
+#######################################
+#######################################
+##########
+cd $RDR
+if [ "$LIST" == "false" ]; then
+ls *.$READS_EXT | sed "s/\.$READS_EXT$//" > $ODR/samples.txt || exit 1
+else
+cat $LIS | sed "s/\.$READS_EXT$//" > $ODR/samples.txt || exit 1
+fi
+
+cd $ODR
+for i in `cat samples.txt`
+do
+mkdir $i
+cd $i
+
+for reference in `cat $ODR/strain_list.txt`
+do
+bowtie2 -x $ODR/$reference.fna -U $RDR/$i.$READS_EXT -S $i.sam -p $NUM_THREAD --quiet || exit 1
+
+samtools view -@ $NUM_THREAD -bS $i.sam > $i.bam || exit 1
+rm $i.sam
+
+if [[ $MISMATCH == 9999 ]]; then
+samtools view -@ $NUM_THREAD -b -F 4 $i.bam | samtools sort -@ $NUM_THREAD -o $i.sorted.bam || exit 1
+rm $i.bam
+else
+bamtools filter -tag "XM:<=$MISMATCH" -in $i.bam | bamtools filter -tag "XO:0" | bamtools filter -tag "XG:0" -out $i.2.bam || exit 1
+rm $i.bam
+samtools view -@ $NUM_THREAD -b -F 4 $i.2.bam | samtools sort -@ $NUM_THREAD -o $i.sorted.bam || exit 1
+rm $i.2.bam
+fi
+
+samtools faidx $ODR/$reference.fna || exit 1
+samtools mpileup -a -a -A -B -f $ODR/$reference.fna $i.sorted.bam > $i.pileup || exit 1
+$SMEG_DIR/pileupParser $i.pileup
+
+rm $i.pileup
+rm $i.sorted.bam
+###################
+sed '1d' polymorphic.site.coverage > polymorphic.site.coverage2
+rm polymorphic.site.coverage
+
+Rscript $SMEG_DIR/SMEG_SNP.R -i $ODR/$reference.Input.txt -p  polymorphic.site.coverage2 -o $reference.coord.txt
+
+awk '$8 ~ "a" || $8 ~ "A" {print $2"\011"$1"\011"$3}' $reference.coord.txt >> $reference.final.temp || exit 1
+awk '$8 ~ "t" || $8 ~ "T" {print $2"\011"$1"\011"$4}' $reference.coord.txt >> $reference.final.temp || exit 1
+awk '$8 ~ "g" || $8 ~ "G" {print $2"\011"$1"\011"$5}' $reference.coord.txt >> $reference.final.temp || exit 1
+awk '$8 ~ "c" || $8 ~ "C" {print $2"\011"$1"\011"$6}' $reference.coord.txt >> $reference.final.temp || exit 1
+
+sort -n -k2,2 $reference.final.temp > $i.$reference.cov.txt
+
+rm $reference.final.temp
+rm $reference.coord.txt
+rm polymorphic.site.coverage2
+
+done
+Rscript $SMEG_DIR/SNP_method_ref.R -i $i.temp1.txt -c $COV_CUTOFF -d 0.5 -s $MIN_SNP 
+awk -F'\t' '{ $2 = ($2 > 10 ? 1 : $2) } 1' OFS='\t' $i.temp1.txt | awk '$3 > '$COV_CUTOFF'' | sed "s/$i.//g" > $i.SMEG.txt 
+rm $i.temp1.txt
+printf '1\ni\nStrain\tSMEG\tCoverage\tNo of SNPs\tSMEG range\n.\nw\n' | ed -s $i.SMEG.txt
+
+rm $i.*.cov.txt
+cd $ODR
+done
+cd $ODR
+exec 3>&2
+exec 2> /dev/null
+rm *.txt
+rm *.bt2
+rm *.aln
+rm *.fai
+rm *.fna
+exec 2>&3
+
+cp */*.SMEG.txt $ODR/. || exit 1
+rm -rf */
+#################
+# MERGE OPTION
+################
+cd $ODR
+if [ "$MERGE" == "true" ]; then
+cat *.SMEG.txt | grep -v "SMEG" | cut -f1 | sort | uniq > SMEG.genomes
+ls *.txt | sed 's/\.txt//g' > SMEG_list.txt
+
+for i in `cat SMEG_list.txt`
+do
+for f in `cat SMEG.genomes`
+do
+paste <(awk '$1 == "'$f'" {print $1}' SMEG.genomes ) <(awk '$1 == "'$f'" {print $2}' $i.txt )  >> $i.temp.merge.txt
+done
+printf '1\ni\nStrain\t'$i'\n.\nw\n' | ed -s $i.temp.merge.txt
+done
+
+for f in *.temp.merge.txt
+do
+cut -f2 $f > $f.tmp
+rm $f
+done
+
+printf '1\ni\nStrain\n.\nw\n' | ed -s SMEG.genomes
+paste -d'\t' SMEG.genomes *.temp.merge.txt.tmp > merged_table.txt
+rm *.temp.merge.txt.tmp
+rm SMEG_list.txt
+rm SMEG.genomes
+exec 3>&2
+exec 2> /dev/null
+Rscript $SMEG_DIR/heatmap.R
+exec 2>&3
+echo "run complete"
+else
+echo "run complete"
+fi
diff --git a/smeg b/smeg
new file mode 100644
index 0000000..d8d041a
--- /dev/null
+++ b/smeg
@@ -0,0 +1,505 @@
+#!/bin/bash
+
+package=""  # Default to empty package
+SMEG_DIR="$( cd "$( dirname "$(readlink -f ${BASH_SOURCE[0]})" )" && pwd )"
+WDR=$(pwd)
+
+while getopts ":hv" opt; do
+  case ${opt} in
+    h )
+      echo "Usage:"
+      echo "    smeg build_species <options>   Build species database"
+      echo "    smeg growth_est <options>      Estimate strain-specific growth rate"
+      echo "    smeg -v                        Version"
+      echo "    smeg -h                        Display this help message"
+      exit 0
+      ;;
+    v )
+      echo "smeg v1.1" 
+      exit 0
+      ;;
+   \? )
+     echo "Invalid Option: -$OPTARG" 1>&2
+     exit 1
+     ;;
+  esac
+done
+###########################################################################
+if [ $# -eq 0 ];
+then
+      echo "Usage:"
+      echo "    smeg build_species <options>   Build species database"
+      echo "    smeg growth_est <options>      Estimate strain-specific growth rate"
+      echo "    smeg -v                        Version"
+      echo "    smeg -h                        Display this help message"
+    exit 1
+fi
+##########################################################################
+
+shift $((OPTIND -1))
+
+subcommand=$1;
+case "$subcommand" in
+  # Parse options to the first sub command
+  build_species)
+    package=$1; shift  # Remove 'build_species' from the argument list
+    LIST=false
+    NUM_THREAD=4
+    SAT=0.6
+    CNT=50
+    IGNORE_ITER=false
+    AUTO=false
+    KEEP=false
+    REF_ONLY=false
+    INT='^[0-9]+$'
+    FLOAT='^[0-9]+([.][0-9]+)?$'
+    # Process package options
+    while getopts ":g:o:l:p:r:s:t:hiake" opt; do
+      case ${opt} in
+        g )
+          GEN_DIR=$OPTARG
+          ;;
+        o )
+          OUTPUT_DIR=$OPTARG
+          ;;
+        l )
+          LIST=$OPTARG
+          ;;
+	p )
+          NUM_THREAD=$OPTARG
+          ;;
+        s )
+          SAT=$OPTARG
+          ;;
+        t )
+          CNT=$OPTARG
+          ;;
+        i )
+          IGNORE_ITER=true
+          ;;
+        a )
+          AUTO=true
+          ;;
+        r )
+          REPGEN=$OPTARG
+          ;;
+        k )
+          KEEP=true
+          ;;
+        e )
+          REF_ONLY=true
+          ;;
+	h )
+      echo "Usage:"
+      echo "    smeg build_species <options>"
+      echo "    <options>"
+      echo "    -g        Genomes directory"
+      echo "    -o        Output directory"
+      echo "    -l        File listing a subset of genomes for database building"
+      echo "                [default = use all genomes in 'Genomes directory']"
+      echo "    -p INT    Number of threads [default 4]"
+      echo "    -s FLOAT  SNP assignment threshold (range 0.1 - 1) [default 0.6]"
+      echo "    -t INT    Cluster SNPs threshold for iterative clustering [default 50]"
+      echo "    -i        Ignore iterative clustering"
+      echo "    -a        Activate auto-mode"
+      echo "    -r        Representative genome [default = auto select Rep genome]"
+      echo "    -k        Keep Roary output [default = false]"
+      echo "    -e        Create database ONLY applicable with Reference-based SMEG method" 
+      echo "                [default = generate database suitable for both de novo and ref-based methods]"
+      echo "    -h        Display this message"
+      exit 0
+      ;;
+        \? )
+          echo "Invalid Option: -$OPTARG" 1>&2
+          exit 1
+          ;;
+        : )
+          echo "Invalid Option: -$OPTARG requires an argument" 1>&2
+          exit 1
+          ;;
+       *  ) 
+          echo "Unimplemented option: -$OPTARG" >&2
+          exit 1
+          ;;
+esac
+done
+
+if [ $# -eq 0 ];
+then
+      echo "Usage:"
+      echo "    smeg build_species <options>"
+      echo "    <options>"
+      echo "    -g        Genomes directory"
+      echo "    -o        Output directory"
+      echo "    -l        File listing a subset of genomes for database building"
+      echo "                [default = use all genomes in 'Genomes directory']"
+      echo "    -p INT    Number of threads [default 4]"
+      echo "    -s FLOAT  SNP assignment threshold (range 0.1 - 1) [default 0.6]"
+      echo "    -t INT    Cluster SNPs threshold for iterative clustering [default 50]"
+      echo "    -i        Ignore iterative clustering"
+      echo "    -a        Activate auto-mode"
+      echo "    -r        Representative genome [default = auto select Rep genome]"
+      echo "    -k        Keep Roary output [default = false]"
+      echo "    -e        Create database ONLY applicable with Reference-based SMEG method" 
+      echo "                [default = generate database suitable for both de novo and ref-based methods]"
+      echo "    -h        Display this message"
+      exit 1
+fi
+
+if [ "x" == "x$GEN_DIR" ]; then
+  echo "-g [option] is required"
+  exit 1
+fi
+if ! [[ $NUM_THREAD =~ $INT ]] && [[ $NUM_THREAD != 1 ]] ; then
+    echo "-p [option] requires an integer"
+    exit 1
+fi
+if [ "$LIST" != "false" ]; then
+LIS=$(readlink -f $LIST)
+fi
+if ! [[ $SAT =~ $FLOAT ]] || [[ $SAT > 1 ]] || [[ $SAT < 0.1 ]]; then
+    echo "-s [option] requires a numeric value between 0.1 and 1"
+    exit 1
+fi
+if ! [[ $CNT =~ $INT ]] && [[ $CNT != 50 ]]; then
+    echo "-t [option] requires an integer"
+    exit 1
+fi
+if [[ $CNT == 0 ]]; then
+    echo "-t [option] requires a non-zero value"
+    exit 1
+fi
+if  [  "$AUTO" == "true" ] ; then
+    REF_ONLY=false
+fi
+if [ "x" == "x$OUTPUT_DIR" ]; then
+ directory=$(echo "species_database_$(date +%s)")
+ mkdir $WDR/$directory
+ OUTPUT_DIR=$WDR/$directory
+fi
+
+shift $((OPTIND -1))
+    ;;
+###############
+ growth_est)
+    package=$1; shift  # Remove 'growth_est' from the argument list
+   METHOD=0
+   LIST=false
+   GEN_LIST=false
+   DESMAN=false
+   COV_CUTOFF=0.5
+   READS_EXT="fastq"
+   NUM_THREAD=4
+   CLUS_DET=0.2
+   SAT=0.6
+   INT='^[0-9]+$'
+   FLOAT='^[0-9]+([.][0-9]+)?$'
+   MERGE=false
+   MIN_SNP=100
+   MISMATCH=9999
+    # Process package options
+    while getopts ":r:o:s:u:d:n:l:m:x:t:c:a:g:p:eh" opt; do
+      case ${opt} in
+        r )
+          READS_DIR=$OPTARG
+          ;;
+        x )
+          READS_EXT=$OPTARG
+          ;;
+        o )
+          OUTPUT_DIR=$OPTARG
+          ;;
+        s )
+          SPECIES_DIR=$OPTARG
+          ;;
+	d )
+          CLUS_DET=$OPTARG
+          ;;
+        c )
+          COV_CUTOFF=$OPTARG
+          ;;
+	p )
+          NUM_THREAD=$OPTARG
+          ;;
+        t )
+          SAT=$OPTARG
+          ;;
+        m )
+          METHOD=$OPTARG
+          ;;
+        e )
+          MERGE=true
+          ;;
+        l )
+          LIST=$OPTARG
+          ;;
+        g )
+          GEN_LIST=$OPTARG
+          ;;
+        n )
+          MISMATCH=$OPTARG
+          ;;
+        u )
+          MIN_SNP=$OPTARG
+          ;;
+        a )
+          DESMAN=$OPTARG
+          ;;
+        h )
+      echo "Usage:"
+      echo "    smeg growth_est <options>"
+      echo "    <options>"
+      echo ""
+      echo "  ## MAIN OPTIONS ## "
+      echo "    -r         Reads directory (single-end reads)"
+      echo "    -x         Sample filename extension (fq, fastq, fastq.gz) [default fastq]"
+      echo "    -o         Output directory"
+      echo "    -s         Species database directory"
+      echo "    -m  INT    SMEG method (0 = de novo-based method, 1 = reference-based method) [default = 0]"
+      echo "    -c  FLOAT  Coverage cutoff (>= 0.5) [default 0.5]"
+      echo "    -u  INT    Minimum number of SNPs to estimate growth rate [default = 100]"
+      echo "    -l         Path to file listing a subset of reads for analysis"
+      echo "               [default = analyze all samples in Reads directory]"
+      echo ""
+      echo "  ## DE-NOVO BASED APPROACH OPTIONS ## "
+      echo "    -d  FLOAT  Cluster detection threshold (range 0.1 - 1) [default = 0.2]"
+      echo "    -t  FLOAT  Sample-specific SNP assignment threshold (range 0.1 - 1) [default = 0.6]
+"
+      echo "  ## REFERENCE BASED APPROACH OPTIONS ##" 
+      echo "    -g         File listing reference genomes for growth rate estimation"
+      echo "    -a         File listing FULL PATH to DESMAN-resolved strains in fasta format (core-genes)"
+      echo "    -n  INT    Max number of mismatch [default = use default bowtie2 threshold]
+"
+      echo "  ## OTHER OPTIONS ##"
+      echo "    -e         merge output tables into a single matrix file and generate heatmap"
+      echo "    -p  INT    Number of threads [default 4]"
+      echo "    -h         Display this message"
+      exit 0
+      ;;
+        \? )
+          echo "Invalid Option: -$OPTARG" 1>&2
+          exit 1
+          ;;
+        : )
+          echo "Invalid Option: -$OPTARG requires an argument" 1>&2
+          exit 1
+          ;;
+      esac
+    done
+
+if [ $# -eq 0 ];
+then
+  echo "Usage:"
+      echo "    smeg growth_est <options>"
+      echo "    <options>"
+      echo ""
+      echo "  ## MAIN OPTIONS ## "
+      echo "    -r         Reads directory (single-end reads)"
+      echo "    -x         Sample filename extension (fq, fastq, fastq.gz) [default fastq]"
+      echo "    -o         Output directory"
+      echo "    -s         Species database directory"
+      echo "    -m  INT    SMEG method (0 = de novo-based method, 1 = reference-based method) [default = 0]"
+      echo "    -c  FLOAT  Coverage cutoff (>= 0.5) [default 0.5]"
+      echo "    -u  INT    Minimum number of SNPs to estimate growth rate [default = 100]"
+      echo "    -l         Path to file listing a subset of reads for analysis"
+      echo "               [default = analyze all samples in Reads directory]"
+      echo ""
+      echo "  ## DE-NOVO BASED APPROACH OPTIONS ## "
+      echo "    -d  FLOAT  Cluster detection threshold (range 0.1 - 1) [default = 0.2]"
+      echo "    -t  FLOAT  Sample-specific SNP assignment threshold (range 0.1 - 1) [default = 0.6]
+"
+      echo "  ## REFERENCE BASED APPROACH OPTIONS ##"
+      echo "    -g         File listing reference genomes for growth rate estimation"
+      echo "    -a         FIle listing FULL PATH to DESMAN-resolved strains in fasta format (core-genes)"
+      echo "    -n  INT    Max number of mismatch [default = use default bowtie2 threshold]
+"
+      echo "  ## OTHER OPTIONS ##"
+      echo "    -e         merge output tables into a single matrix file and generate heatmap"
+      echo "    -p  INT    Number of threads [default 4]"
+      echo "    -h         Display this message"
+      exit 1
+fi
+
+if [ "x" == "x$READS_DIR" ]; then
+  echo "-r [option] is required"
+  exit 1
+fi
+if [ "x" == "x$SPECIES_DIR" ] && [ "$DESMAN" == "false" ]; then
+  echo "-s [option] is required"
+  exit 1
+fi
+if [ "$READS_EXT" != "fq" ] && [ "$READS_EXT" != "fastq" ] && [ "$READS_EXT" != "fastq.gz" ] ; then
+  echo "Invalid filename extension. Recognized extensions are fq, fastq, fastq.gz"
+  exit 1
+fi
+if ! [[ $COV_CUTOFF =~ $FLOAT ]] && [[ $COV_CUTOFF != 1 ]]; then
+    echo "-c [option] requires a numeric value"
+    exit 1
+fi
+if (( $(bc <<< "$COV_CUTOFF < 0.5") )); then
+    echo "required minimum coverage cutoff is 0.5"
+    exit 1
+fi
+if  [[ $METHOD != 1 ]] && [[ $METHOD != 0 ]]; then
+    echo "-m [option] requires an integer value of 0 or 1"
+    echo "de novo-based method = 0, reference-based method = 1"
+    exit 1
+fi
+
+if [ "$GEN_LIST" != "false" ] && [ "$DESMAN" != "false" ]; then
+echo "-g [option] and -a [option] cannot be specified simultaneously"
+exit 1
+fi
+if [ "$GEN_LIST" != "false" ] || [ "$DESMAN" != "false" ]; then
+METHOD=1
+if [ "$GEN_LIST" != "false" ]; then
+GEN_LIS=$(readlink -f $GEN_LIST)
+fi
+if [ "$DESMAN" != "false" ]; then
+DESM=$(readlink -f $DESMAN)
+fi
+
+if ! [[ $MISMATCH =~ $INT ]] ; then
+    echo "-n [option] requires an integer"
+    exit 1
+fi
+else
+if ! [[ $SAT =~ $FLOAT ]] || [[ $SAT > 1 ]] || [[ $SAT < 0.1 ]]; then
+    echo "-t [option] requires a numeric value between 0.1 and 1"
+    exit 1
+fi
+if ! [[ $CLUS_DET =~ $FLOAT ]] || [[ $CLUS_DET > 1 ]] || [[ $CLUS_DET < 0.1 ]]; then
+    echo "-d [option] requires a numeric value between 0.1 and 1"
+    exit 1
+fi
+fi
+if ! [[ $MIN_SNP =~ $INT ]] ; then
+    echo "-u [option] requires an integer "
+    exit 1
+fi
+if ! [[ $NUM_THREAD =~ $INT ]] && [[ $NUM_THREAD != 1 ]] ; then
+    echo "-p [option] requires an integer"
+    exit 1
+fi
+
+if [[ $METHOD == 1 ]]; then
+if [ "$GEN_LIST" == "false" ] && [ "$DESMAN" == "false" ]; then
+    echo "Reference-based method (-m = 1) requires input from -g or -a flags"
+    exit 1 
+fi
+fi
+if [ "x" == "x$OUTPUT_DIR" ]; then
+  directory=$(echo "SMEG_growth_$(date +%s)")
+ mkdir $WDR/$directory
+ OUTPUT_DIR=$WDR/$directory
+fi
+if [ "$LIST" != "false" ]; then
+LIS=$(readlink -f $LIST)
+fi
+
+  shift $((OPTIND -1))
+    ;;
+esac
+###################################
+if [ "$package" != "build_species" ] && [ "$package" != "growth_est" ]; then
+  echo "Invalid option"
+  exit 1
+fi
+#############################################
+
+
+echo " ################ Checking for dependencies ########"
+requirements=$(echo "gcc parallel R Mauve roary prokka bowtie2 samtools bamtools bedtools blastn")
+for f in `echo $requirements` 
+do 
+toolCheck=$(type -P $f)
+if [ -z $toolCheck ]; then 
+echo "ERROR: $f missing" 
+echo "Check https://github.com/ohlab/SMEG for required packages"
+exit 1
+else
+echo "$f found"
+fi 
+done
+echo "All required packages found"
+echo " ################ Checking for required R libraries ########"
+###################3
+cd $WDR
+ODR=$(readlink -f $OUTPUT_DIR)
+mkdir -p $ODR
+
+if [ -z "$(ls -A $ODR)" ]; then
+   echo "Output directory ok"
+else
+   echo "ERROR: Output directory not Empty"
+   exit 1
+fi
+####################
+cd $ODR
+Rscript $SMEG_DIR/check_R_libraries.R
+if [ -s .checkedR ]; then
+echo "ERROR: The following R libraries are missing"
+cat .checkedR
+echo "Check https://github.com/ohlab/SMEG for required packages"
+rm .checkedR
+exit 1
+else
+echo "R libraries ok"
+rm .checkedR
+fi
+
+cd $SMEG_DIR
+##############################################
+###########################################
+if [ "$package" == "build_species" ]
+  then
+export GEN_DIR
+export ODR
+export NUM_THREAD
+export SAT
+export KEEP
+export CNT  
+export IGNORE_ITER
+export AUTO
+export SMEG_DIR
+export WDR
+export package
+export LIST
+export REF_ONLY
+export REPGEN
+if [ "$LIST" != "false" ]; then
+export LIS
+fi
+if [ "x$REPGEN" != "x" ]; then
+    export REPGEN
+fi
+
+./build_sp
+else
+export SMEG_DIR
+export WDR
+export package
+export SPECIES_DIR
+export NUM_THREAD
+export READS_DIR
+export ODR
+export METHOD
+export LIST
+export LIS
+export MIN_SNP
+export GEN_LIS
+export GEN_LIST
+export DESM
+export DESMAN
+export COV_CUTOFF
+export READS_EXT
+export CLUS_DET
+export SAT
+export MERGE
+export MISMATCH
+if  [[ $METHOD == 0 ]]; then
+./growth_est_denovo
+else
+./growth_est_ref
+fi
+fi