Skip to content

Commit aab27ee

Browse files
authored
add profiling cmd (#7)
* init minimap2_processing * fix test-1 * input -> artifact_id * return filepath vs template * adding tests
1 parent 986c534 commit aab27ee

16 files changed

+456
-88
lines changed

data/templates/0.mapping_minimap2_db.sbatch

Lines changed: 0 additions & 25 deletions
This file was deleted.

data/templates/1.hifiasm-meta_new.sbatch

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
#SBATCH -o {{output}}/step-1/logs/%x-%A_%a.out
99
#SBATCH -e {{output}}/step-1/logs/%x-%A_%a.out
1010
#SBATCH --array {{array_params}}
11+
1112
source ~/.bashrc
13+
set -e
1214
conda activate {{conda_environment}}
13-
1415
cd {{output}}/step-1
16+
1517
step=${SLURM_ARRAY_TASK_ID}
1618
input=$(head -n $step {{output}}/sample_list.txt | tail -n 1)
1719

data/templates/2.get-circular-genomes.sbatch

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010
#SBATCH --array {{array_params}}
1111

1212
source ~/.bashrc
13-
13+
set -e
1414
conda activate {{conda_environment}}
15-
1615
cd {{output}}/step-1
1716

1817
step=${SLURM_ARRAY_TASK_ID} ##1000_2, 1000_1

data/templates/3.minimap2_assembly.sbatch

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#SBATCH --array {{array_params}}
1111

1212
source ~/.bashrc
13+
set -e
1314
conda activate {{conda_environment}}
1415
cd {{output}}
1516

data/templates/4.metawrap_binning_new.sbatch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
#SBATCH --array {{array_params}}
1111

1212
source ~/.bashrc
13+
set -e
1314
conda activate {{conda_environment}}
14-
1515
cd {{output}}
1616

1717
step=${SLURM_ARRAY_TASK_ID}

data/templates/5.DAS_Tools_prepare_batch3_test.sbatch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#SBATCH --array {{array_params}}
1111

1212
source ~/.bashrc
13-
13+
set -e
1414
conda activate {{conda_environment}}
1515
cd {{output}}
1616

@@ -35,4 +35,4 @@ Fasta_to_Contig2Bin.sh -i ./concoct_bins -e fa > ${sample_name}.concoct.tsv
3535
Fasta_to_Contig2Bin.sh -i ./maxbin2_bins -e fa > ${sample_name}.maxbin2.tsv
3636
Fasta_to_Contig2Bin.sh -i ./metabat2_bins -e fa > ${sample_name}.metabat2.tsv
3737

38-
DAS_Tool --bins=${sample_name}.concoct.tsv,${sample_name}.maxbin2.tsv,${sample_name}.metabat2.tsv --contigs={{output}}/step-2/${sample_name}_noLCG.fa --outputbasename={{output}}/${folder}/${sample_name}/${sample_name} --labels=CONCOCT,MaxBin,MetaBAT --threads={{nprocs}} --search_engine=diamond --dbDirectory=${DAS_db} --write_bins
38+
DAS_Tool --bins=${sample_name}.concoct.tsv,${sample_name}.maxbin2.tsv,${sample_name}.metabat2.tsv --contigs={{output}}/step-2/${sample_name}_noLCG.fa --outputbasename={{output}}/${folder}/${sample_name}/${sample_name} --labels=CONCOCT,MaxBin,MetaBAT --threads={{nprocs}} --search_engine=diamond --dbDirectory=${DAS_db} --write_bins

data/templates/6.MAG_rename.sbatch

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#SBATCH --array {{array_params}}
1111

1212
source ~/.bashrc
13+
set -e
1314
conda activate {{conda_environment}}
1415
cd {{output}}
1516

data/templates/7.checkm_batch.sbatch

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111

1212

1313
source ~/.bashrc
14-
14+
set -e
1515
conda activate {{conda_environment}}
16-
1716
cd {{output}}
1817

1918
step=${SLURM_ARRAY_TASK_ID}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/bash
2+
#SBATCH -J {{job_name}}
3+
#SBATCH -p qiita
4+
#SBATCH -N {{node_count}}
5+
#SBATCH -n {{nprocs}}
6+
#SBATCH --time {{wall_time_limit}}
7+
#SBATCH --mem {{mem_in_gb}}G
8+
#SBATCH -o {{output}}/minimap2/logs/%x-%A_%a.out
9+
#SBATCH -e {{output}}/minimap2/logs/%x-%A_%a.out
10+
#SBATCH --array {{array_params}}
11+
12+
source ~/.bashrc
13+
set -e
14+
conda activate {{conda_environment}}
15+
mkdir -p {{output}}/alignments
16+
cd {{output}}/
17+
db=/ddn_scratch/qiita_t/working_dir/tmp/db/WoLr2.mmi
18+
19+
step=${SLURM_ARRAY_TASK_ID}
20+
input=$(head -n $step {{output}}/sample_list.txt | tail -n 1)
21+
22+
sample_name=`echo $input | awk '{print $1}'`
23+
filename=`echo $input | awk '{print $2}'`
24+
25+
fn=`basename ${filename}`
26+
27+
minimap2 -x map-hifi -t {{nprocs}} -a \
28+
--secondary=no --MD --eqx ${db} \
29+
${filename} | \
30+
samtools sort -@ {{nprocs}} - | \
31+
awk 'BEGIN { FS=OFS="\t" } /^@/ { print; next } { $10="*"; $11="*" } 1' | \
32+
xz -1 -T1 > {{output}}/alignments/${sample_name}.sam.xz
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/bash
2+
#SBATCH -J {{job_name}}
3+
#SBATCH -p qiita
4+
#SBATCH -N {{node_count}}
5+
#SBATCH -n {{nprocs}}
6+
#SBATCH --time {{wall_time_limit}}
7+
#SBATCH --mem {{mem_in_gb}}G
8+
#SBATCH -o {{output}}/merge/logs/%x-%A_%a.out
9+
#SBATCH -e {{output}}/merge/logs/%x-%A_%a.out
10+
11+
source ~/.bashrc
12+
set -e
13+
conda activate {{conda_environment}}
14+
cd {{output}}/
15+
tax=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2.tax
16+
coords=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2.coords
17+
len_map=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2/length.map
18+
functional_dir=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2/
19+
20+
mkdir -p {{output}}/coverages/
21+
22+
for f in `ls alignments/*.sam.xz`; do
23+
sn=`basename ${f/.sam.xz/}`;
24+
of={{output}}/bioms/${sn};
25+
mkdir -p ${of};
26+
echo "woltka classify -i ${f} -o ${of}/none.biom --no-demux --lineage ${tax} --rank none --outcov {{output}}/coverages/";
27+
echo "woltka classify -i ${f} -o ${of}/per-gene.biom --no-demux -c ${coords}";
28+
done | parallel -j {{node_count}}
29+
wait
30+
31+
for f in `ls bioms/*/per-gene.biom`; do
32+
dn=`dirname ${f}`;
33+
sn=`basename ${sn}`;
34+
echo "woltka collapse -i ${f} -m ${functional_dir}/orf-to-ko.map.xz -o ${dn}/ko.biom; " \
35+
"woltka collapse -i ${dn}/ko.biom -m ${functional_dir}/ko-to-ec.map -o ${dn}/ec.biom; " \
36+
"woltka collapse -i ${dn}/ko.biom -m ${functional_dir}/ko-to-reaction.map -o ${dn}/reaction.biom; " \
37+
"woltka collapse -i ${dn}/reaction.biom -m ${functional_dir}/reaction-to-module.map -o ${dn}/module.biom; " \
38+
"woltka collapse -i ${dn}/module.biom -m ${functional_dir}/module-to-pathway.map -o ${dn}/pathway.biom;"
39+
done | parallel -j {{node_count}}
40+
wait
41+
42+
# MISSING:
43+
# merge bioms!
44+
45+
find {{output}}/coverages/ -iname "*.cov" > {{output}}/cov_files.txt
46+
micov consolidate --paths {{output}}/cov_files.txt --lengths ${len_map} --output {{output}}/coverages.tgz
47+
48+
cd alignments
49+
tar -cvf ../alignments.tar *.sam.xz

0 commit comments

Comments
 (0)