diff --git a/data/templates/0.mapping_minimap2_db.sbatch b/data/templates/0.mapping_minimap2_db.sbatch deleted file mode 100644 index c158c3a..0000000 --- a/data/templates/0.mapping_minimap2_db.sbatch +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -#SBATCH -J {{job_name}} -#SBATCH -p qiita -#SBATCH -N {{node_count}} -#SBATCH -n {{nprocs}} -#SBATCH --time {{wall_time_limit}} -#SBATCH --mem {{mem_in_gb}}G -#SBATCH -o {{output}}/step-0/logs/%x-%A_%a.out -#SBATCH -e {{output}}/step-0/logs/%x-%A_%a.out -#SBATCH --array {{array_params}} - -source ~/.bashrc -conda activate {{conda_environment}} - - -cd {{output}}/step-0 -step=${SLURM_ARRAY_TASK_ID} ##1000_2, 1000_1 -input=$(head -n $step {{output}}/sample_list.txt | tail -n 1) - -sample_name=`echo $input | awk '{print $1}'` -filename=`echo $input | awk '{print $2}'` - -fn=`basename ${filename}` - -minimap2 -x map-hifi -t {{nprocs}} -a --secondary=no --MD --eqx /projects/wol/qiyun/wol2/databases/minimap2/WoLr2.mmi ${filename} | samtools sort -@ {{nprocs}} -o {{output}}/step-1/${sample_name}_WoLr2.sorted.sam \ No newline at end of file diff --git a/data/templates/1.hifiasm-meta_new.sbatch b/data/templates/1.hifiasm-meta_new.sbatch index febbea2..5b6c8c4 100644 --- a/data/templates/1.hifiasm-meta_new.sbatch +++ b/data/templates/1.hifiasm-meta_new.sbatch @@ -8,10 +8,12 @@ #SBATCH -o {{output}}/step-1/logs/%x-%A_%a.out #SBATCH -e {{output}}/step-1/logs/%x-%A_%a.out #SBATCH --array {{array_params}} + source ~/.bashrc +set -e conda activate {{conda_environment}} - cd {{output}}/step-1 + step=${SLURM_ARRAY_TASK_ID} input=$(head -n $step {{output}}/sample_list.txt | tail -n 1) diff --git a/data/templates/2.get-circular-genomes.sbatch b/data/templates/2.get-circular-genomes.sbatch index 65a7ba6..dadd114 100644 --- a/data/templates/2.get-circular-genomes.sbatch +++ b/data/templates/2.get-circular-genomes.sbatch @@ -10,9 +10,8 @@ #SBATCH --array {{array_params}} source ~/.bashrc - +set -e conda activate {{conda_environment}} - cd {{output}}/step-1 step=${SLURM_ARRAY_TASK_ID} ##1000_2, 1000_1 diff --git a/data/templates/3.minimap2_assembly.sbatch b/data/templates/3.minimap2_assembly.sbatch index f854ba3..36c14b6 100644 --- a/data/templates/3.minimap2_assembly.sbatch +++ b/data/templates/3.minimap2_assembly.sbatch @@ -10,6 +10,7 @@ #SBATCH --array {{array_params}} source ~/.bashrc +set -e conda activate {{conda_environment}} cd {{output}} diff --git a/data/templates/4.metawrap_binning_new.sbatch b/data/templates/4.metawrap_binning_new.sbatch index 66cd380..310f415 100644 --- a/data/templates/4.metawrap_binning_new.sbatch +++ b/data/templates/4.metawrap_binning_new.sbatch @@ -10,8 +10,8 @@ #SBATCH --array {{array_params}} source ~/.bashrc +set -e conda activate {{conda_environment}} - cd {{output}} step=${SLURM_ARRAY_TASK_ID} diff --git a/data/templates/5.DAS_Tools_prepare_batch3_test.sbatch b/data/templates/5.DAS_Tools_prepare_batch3_test.sbatch index f328ec2..2c175be 100755 --- a/data/templates/5.DAS_Tools_prepare_batch3_test.sbatch +++ b/data/templates/5.DAS_Tools_prepare_batch3_test.sbatch @@ -10,7 +10,7 @@ #SBATCH --array {{array_params}} source ~/.bashrc - +set -e conda activate {{conda_environment}} cd {{output}} @@ -35,4 +35,4 @@ Fasta_to_Contig2Bin.sh -i ./concoct_bins -e fa > ${sample_name}.concoct.tsv Fasta_to_Contig2Bin.sh -i ./maxbin2_bins -e fa > ${sample_name}.maxbin2.tsv Fasta_to_Contig2Bin.sh -i ./metabat2_bins -e fa > ${sample_name}.metabat2.tsv -DAS_Tool --bins=${sample_name}.concoct.tsv,${sample_name}.maxbin2.tsv,${sample_name}.metabat2.tsv --contigs={{output}}/step-2/${sample_name}_noLCG.fa --outputbasename={{output}}/${folder}/${sample_name}/${sample_name} --labels=CONCOCT,MaxBin,MetaBAT --threads={{nprocs}} --search_engine=diamond --dbDirectory=${DAS_db} --write_bins \ No newline at end of file +DAS_Tool --bins=${sample_name}.concoct.tsv,${sample_name}.maxbin2.tsv,${sample_name}.metabat2.tsv --contigs={{output}}/step-2/${sample_name}_noLCG.fa --outputbasename={{output}}/${folder}/${sample_name}/${sample_name} --labels=CONCOCT,MaxBin,MetaBAT --threads={{nprocs}} --search_engine=diamond --dbDirectory=${DAS_db} --write_bins diff --git a/data/templates/6.MAG_rename.sbatch b/data/templates/6.MAG_rename.sbatch index 6cf89a1..8748a04 100644 --- a/data/templates/6.MAG_rename.sbatch +++ b/data/templates/6.MAG_rename.sbatch @@ -10,6 +10,7 @@ #SBATCH --array {{array_params}} source ~/.bashrc +set -e conda activate {{conda_environment}} cd {{output}} diff --git a/data/templates/7.checkm_batch.sbatch b/data/templates/7.checkm_batch.sbatch index 8c0fb0f..d90dd29 100644 --- a/data/templates/7.checkm_batch.sbatch +++ b/data/templates/7.checkm_batch.sbatch @@ -11,9 +11,8 @@ source ~/.bashrc - +set -e conda activate {{conda_environment}} - cd {{output}} step=${SLURM_ARRAY_TASK_ID} diff --git a/data/templates/woltka_minimap2.sbatch b/data/templates/woltka_minimap2.sbatch new file mode 100644 index 0000000..7f01603 --- /dev/null +++ b/data/templates/woltka_minimap2.sbatch @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH -J {{job_name}} +#SBATCH -p qiita +#SBATCH -N {{node_count}} +#SBATCH -n {{nprocs}} +#SBATCH --time {{wall_time_limit}} +#SBATCH --mem {{mem_in_gb}}G +#SBATCH -o {{output}}/minimap2/logs/%x-%A_%a.out +#SBATCH -e {{output}}/minimap2/logs/%x-%A_%a.out +#SBATCH --array {{array_params}} + +source ~/.bashrc +set -e +conda activate {{conda_environment}} +mkdir -p {{output}}/alignments +cd {{output}}/ +db=/ddn_scratch/qiita_t/working_dir/tmp/db/WoLr2.mmi + +step=${SLURM_ARRAY_TASK_ID} +input=$(head -n $step {{output}}/sample_list.txt | tail -n 1) + +sample_name=`echo $input | awk '{print $1}'` +filename=`echo $input | awk '{print $2}'` + +fn=`basename ${filename}` + +minimap2 -x map-hifi -t {{nprocs}} -a \ + --secondary=no --MD --eqx ${db} \ + ${filename} | \ + samtools sort -@ {{nprocs}} - | \ + awk 'BEGIN { FS=OFS="\t" } /^@/ { print; next } { $10="*"; $11="*" } 1' | \ + xz -1 -T1 > {{output}}/alignments/${sample_name}.sam.xz diff --git a/data/templates/woltka_minimap2_merge.sbatch b/data/templates/woltka_minimap2_merge.sbatch new file mode 100644 index 0000000..dc35979 --- /dev/null +++ b/data/templates/woltka_minimap2_merge.sbatch @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH -J {{job_name}} +#SBATCH -p qiita +#SBATCH -N {{node_count}} +#SBATCH -n {{nprocs}} +#SBATCH --time {{wall_time_limit}} +#SBATCH --mem {{mem_in_gb}}G +#SBATCH -o {{output}}/merge/logs/%x-%A_%a.out +#SBATCH -e {{output}}/merge/logs/%x-%A_%a.out + +source ~/.bashrc +set -e +conda activate {{conda_environment}} +cd {{output}}/ +tax=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2.tax +coords=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2.coords +len_map=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2/length.map +functional_dir=/projects/wol/qiyun/wol2/databases/minimap2/WoLr2/ + +mkdir -p {{output}}/coverages/ + +for f in `ls alignments/*.sam.xz`; do + sn=`basename ${f/.sam.xz/}`; + of={{output}}/bioms/${sn}; + mkdir -p ${of}; + echo "woltka classify -i ${f} -o ${of}/none.biom --no-demux --lineage ${tax} --rank none --outcov {{output}}/coverages/"; + echo "woltka classify -i ${f} -o ${of}/per-gene.biom --no-demux -c ${coords}"; +done | parallel -j {{node_count}} +wait + +for f in `ls bioms/*/per-gene.biom`; do + dn=`dirname ${f}`; + sn=`basename ${sn}`; + echo "woltka collapse -i ${f} -m ${functional_dir}/orf-to-ko.map.xz -o ${dn}/ko.biom; " \ + "woltka collapse -i ${dn}/ko.biom -m ${functional_dir}/ko-to-ec.map -o ${dn}/ec.biom; " \ + "woltka collapse -i ${dn}/ko.biom -m ${functional_dir}/ko-to-reaction.map -o ${dn}/reaction.biom; " \ + "woltka collapse -i ${dn}/reaction.biom -m ${functional_dir}/reaction-to-module.map -o ${dn}/module.biom; " \ + "woltka collapse -i ${dn}/module.biom -m ${functional_dir}/module-to-pathway.map -o ${dn}/pathway.biom;" +done | parallel -j {{node_count}} +wait + +# MISSING: +# merge bioms! + +find {{output}}/coverages/ -iname "*.cov" > {{output}}/cov_files.txt +micov consolidate --paths {{output}}/cov_files.txt --lengths ${len_map} --output {{output}}/coverages.tgz + +cd alignments +tar -cvf ../alignments.tar *.sam.xz diff --git a/qp_pacbio/__init__.py b/qp_pacbio/__init__.py index c809957..e8221ff 100644 --- a/qp_pacbio/__init__.py +++ b/qp_pacbio/__init__.py @@ -6,11 +6,43 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from qiita_client import QiitaPlugin, QiitaCommand -from .qp_pacbio import pacbio_processing +from .qp_pacbio import pacbio_processing, minimap2_processing +from .util import plugin_details -__version__ = "2025.9" -plugin = QiitaPlugin("qp-pacbio", __version__, "PacBio Processing") +plugin = QiitaPlugin(**plugin_details) + +# +# minimap2 command +# + +req_params = {'artifact_id': ('integer', ['per_sample_FASTQ'])} +opt_params = dict() +outputs = { + # taxonomic + 'Per genome Predictions': 'BIOM', + 'Per gene Predictions': 'BIOM', + # functional + 'KEGG Ontology (KO)': 'BIOM', + 'KEGG Enzyme (EC)': 'BIOM', + 'KEGG Pathway': 'BIOM', + } +dflt_param_set = dict() + +minimap2_cmd = QiitaCommand( + "Woltka v0.1.7, minimap2", + "Functional and Taxonomic Predictions", + minimap2_processing, + req_params, + opt_params, + outputs, + dflt_param_set, +) +plugin.register_command(minimap2_cmd) + +# +# pacbio processing pipeline command +# req_params = { "artifact_id": ("integer", [None]), diff --git a/qp_pacbio/qp_pacbio.py b/qp_pacbio/qp_pacbio.py index 143b756..ddb0637 100644 --- a/qp_pacbio/qp_pacbio.py +++ b/qp_pacbio/qp_pacbio.py @@ -5,12 +5,16 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from os import makedirs +from os import makedirs, mkdir from os.path import basename, join, exists, getmtime import pathlib +from shutil import copy2 + from jinja2 import Environment, BaseLoader, TemplateNotFound from qiita_client import ArtifactInfo +from subprocess import run + CONDA_ENV = "qp_pacbio_2025.9" MAX_WALL_1000 = 1000 @@ -18,6 +22,11 @@ T5_NAME = "5.DAS_Tools_prepare_batch3_test.sbatch" +def sbatch(args): + res = run(args, check=True, capture_output=True, text=True) + return res.stdout.strip() + + # taken from qp-woltka def search_by_filename(fname, lookup): if fname in lookup: @@ -67,8 +76,10 @@ def _write_slurm(path, template, **ctx): with open(out_fp, mode="w", encoding="utf-8") as f: f.write(rendered) + return out_fp + -def generate_templates(out_dir, job_id, njobs): +def pacbio_generate_templates(out_dir, job_id, njobs): """Generate Slurm submission templates for PacBio processing. Parameters @@ -82,21 +93,6 @@ def generate_templates(out_dir, job_id, njobs): """ jinja_env = Environment(loader=KISSLoader("../data/templates")) - # Step 0 - template0 = jinja_env.get_template("0.mapping_minimap2_db.sbatch") - _write_slurm( - join(out_dir, "step-0"), - template0, - conda_environment=CONDA_ENV, - output=out_dir, - job_name=f"s0-{job_id}", - node_count=1, - nprocs=16, - wall_time_limit=MAX_WALL_1000, - mem_in_gb=300, - array_params=f"1-{njobs}%16", - ) - # Step 1 template1 = jinja_env.get_template("1.hifiasm-meta_new.sbatch") _write_slurm( @@ -270,7 +266,7 @@ def pacbio_processing(qclient, job_id, parameters, out_dir): job_id, "Step 2 of 3: Creating submission templates", ) - generate_templates(out_dir, job_id, njobs) + pacbio_generate_templates(out_dir, job_id, njobs) # If/when you enable submission, capture Slurm job IDs and thread them: # jid0 = qclient.submit_job(f"{out_dir}/step-0/step-0.slurm") @@ -316,3 +312,130 @@ def pacbio_processing(qclient, job_id, parameters, out_dir): [ArtifactInfo("output", "job-output-folder", paths)], f"{job_id} completed.", ) + + +def minimap2_processing(qclient, job_id, parameters, out_dir): + """generates output for minimap2/woltka processing. + + Parameters + ---------- + qclient : tgp.qiita_client.QiitaClient + Qiita server client. + job_id : str + Job id. + parameters : dict + Parameters for this job. + out_dir : str + Output directory. + + Returns + ------- + bool, list, str + Results tuple for Qiita. + """ + qclient.update_job_step(job_id, "Commands finished") + + def _coverage_copy(dest): + fp_coverages = join(out_dir, 'coverages.tgz') + mkdir(dest) + dest = join(dest, 'coverages.tgz') + copy2(fp_coverages, dest) + + return dest + + errors = [] + ainfo = [] + + fp_biom = f'{out_dir}/none.biom' + fp_alng = f'{out_dir}/alignment.tar' + if exists(fp_biom) and exists(fp_alng): + ainfo.append(ArtifactInfo('Per genome Predictions', 'BIOM', [ + (fp_biom, 'biom'), (fp_alng, 'log'), + (_coverage_copy(f'{out_dir}/none/'), 'plain_text')])) + else: + errors.append('Table none/per-genome was not created, please contact ' + 'qiita.help@gmail.com for more information') + + bioms = [ + (f'{out_dir}/per-gene.biom', 'per_gene' 'Per gene Predictions'), + (f'{out_dir}/ko.biom', 'ko' 'KEGG Ontology (KO)'), + (f'{out_dir}/ec.biom', 'ec' 'KEGG Enzyme (EC)'), + (f'{out_dir}/pathway.biom', 'pathway' 'KEGG Pathway'), + ] + + for fb, fn, bn in bioms: + if exists(fb): + ainfo.append(ArtifactInfo(bn, 'BIOM', [ + (fb, 'biom'), + (_coverage_copy(f'{out_dir}/{fn}/'), 'plain_text')])) + else: + errors.append(f'Table "{bn}" was not created, please contact ' + 'qiita.help@gmail.com for more information') + + if errors: + return False, ainfo, '\n'.join(errors) + else: + + return True, ainfo, "" + + +def generate_minimap2_processing(qclient, job_id, out_dir, parameters): + """generates slurm scripts for minimap2/woltka processing. + + Parameters + ---------- + qclient : tgp.qiita_client.QiitaClient + Qiita server client. + job_id : str + Job id. + out_dir : str + Output directory. + parameters : dict + Parameters for this job. + + Returns + ------- + str, str + Returns the two filepaths of the slurm scripts + """ + qclient.update_job_step( + job_id, "Step 1 of 4: Collecting info and generating submission") + + artifact_id = parameters["artifact_id"] + + njobs = generate_sample_list(qclient, artifact_id, out_dir) + + qclient.update_job_step( + job_id, + "Step 2 of 4: Creating submission templates", + ) + + jinja_env = Environment(loader=KISSLoader("../data/templates")) + minimap2_template = jinja_env.get_template("woltka_minimap2.sbatch") + minimap2_script = _write_slurm( + f'{out_dir}/minimap2', + minimap2_template, + conda_environment=CONDA_ENV, + output=out_dir, + job_name=f"m2_{job_id}", + node_count=1, + nprocs=16, + wall_time_limit=MAX_WALL_1000, + mem_in_gb=120, + array_params=f"1-{njobs}%16", + ) + minimap2_merge_template = jinja_env.get_template( + "woltka_minimap2_merge.sbatch") + minimap2_merge_script = _write_slurm( + f'{out_dir}/merge', + minimap2_merge_template, + conda_environment=CONDA_ENV, + output=out_dir, + job_name=f"me_{job_id}", + node_count=1, + nprocs=16, + wall_time_limit=MAX_WALL_1000, + mem_in_gb=120 + ) + + return minimap2_script, minimap2_merge_script diff --git a/qp_pacbio/scripts.py b/qp_pacbio/scripts.py index 69bc07e..0e0338f 100644 --- a/qp_pacbio/scripts.py +++ b/qp_pacbio/scripts.py @@ -8,6 +8,9 @@ # ----------------------------------------------------------------------------- import click from qp_pacbio import plugin +from qp_pacbio.qp_pacbio import generate_minimap2_processing +from qp_pacbio.util import client_connect +from subprocess import run, PIPE @click.command() @@ -32,4 +35,23 @@ def config(env_script, ca_cert): @click.argument("output_dir", required=True) def execute(url, job_id, output_dir): """Executes the task given by job_id and puts the output in output_dir""" - plugin(url, job_id, output_dir) + if 'register' in job_id: + plugin(url, job_id, output_dir) + else: + qclient = client_connect(url) + job_info = qclient.get_job_info(job_id) + parameters = job_info['parameters'] + + main_fp, merge_fp = generate_minimap2_processing( + qclient, job_id, output_dir, parameters) + + # Submitting jobs and returning id + main_job = run(['sbatch', main_fp], stdout=PIPE) + main_job_id = main_job.stdout.decode('utf8').split()[-1] + merge_job = run(['sbatch', '-d', f'afterok:{main_job_id}', + merge_fp], stdout=PIPE) + merge_job_id = merge_job.stdout.decode('utf8').split()[-1] + print(f'{main_job_id}, {merge_job_id}') + + qclient.update_job_step( + job_id, "Step 2 of 4: Aligning sequences") diff --git a/qp_pacbio/tests/test_pacbio.py b/qp_pacbio/tests/test_pacbio.py index 30af8f7..c4d87a1 100644 --- a/qp_pacbio/tests/test_pacbio.py +++ b/qp_pacbio/tests/test_pacbio.py @@ -17,7 +17,7 @@ from qiita_client.testing import PluginTestCase from qp_pacbio import plugin -from qp_pacbio.qp_pacbio import pacbio_processing +from qp_pacbio.qp_pacbio import generate_minimap2_processing, pacbio_processing # Keep these in sync with your generator defaults CONDA_ENV = "qp_pacbio_2025.9" @@ -40,10 +40,12 @@ "#SBATCH -o {out_dir}/step-1/logs/%x-%A_%a.out\n" "#SBATCH -e {out_dir}/step-1/logs/%x-%A_%a.out\n" "#SBATCH --array 1-{njobs}%16\n" + "\n" "source ~/.bashrc\n" + "set -e\n" f"conda activate {CONDA_ENV}\n" - "\n" "cd {out_dir}/step-1\n" + "\n" "step=${{SLURM_ARRAY_TASK_ID}}\n" "input=$(head -n $step {out_dir}/sample_list.txt | tail -n 1)\n" "\n" @@ -112,6 +114,8 @@ def _insert_data(self): return aid + +class PacProcessingTests(PacBioTests): def test_pacbio_processing(self): params = {"artifact_id": self._insert_data()} job_id = "my-job-id" @@ -153,5 +157,122 @@ def test_pacbio_processing(self): self.assertCountEqual(ainfo, exp) +class PacWoltkaProfilingTests(PacBioTests): + def test_pacbio_processing(self): + params = {"artifact_id": int(self._insert_data())} + job_id = "my-job-id" + out_dir = mkdtemp() + self._clean_up_files.append(out_dir) + + # this should fail cause we don't have valid data + main_fp, merge_fp = generate_minimap2_processing( + self.qclient, job_id, out_dir, params) + with open(main_fp, 'r') as f: + obs_main = f.readlines() + with open(merge_fp, 'r') as f: + obs_merge = f.readlines() + + exp_main = [ + '#!/bin/bash\n', + '#SBATCH -J m2_my-job-id\n', + '#SBATCH -p qiita\n', + '#SBATCH -N 1\n', + '#SBATCH -n 16\n', + '#SBATCH --time 1000\n', + '#SBATCH --mem 120G\n', + f'#SBATCH -o {out_dir}/minimap2/logs/%x-%A_%a.out\n', + f'#SBATCH -e {out_dir}/minimap2/logs/%x-%A_%a.out\n', + '#SBATCH --array 1-2%16\n', + '\n', + 'source ~/.bashrc\n', + 'set -e\n', + 'conda activate qp_pacbio_2025.9\n', + f'mkdir -p {out_dir}/alignments\n', + f'cd {out_dir}/\n', + 'db=/ddn_scratch/qiita_t/working_dir/tmp/db/WoLr2.mmi\n', + '\n', + 'step=${SLURM_ARRAY_TASK_ID}\n', + f'input=$(head -n $step {out_dir}/sample_list.txt | tail -n 1)\n', + '\n', + "sample_name=`echo $input | awk '{print $1}'`\n", + "filename=`echo $input | awk '{print $2}'`\n", + '\n', + 'fn=`basename ${filename}`\n', + '\n', + 'minimap2 -x map-hifi -t 16 -a \\\n', + ' --secondary=no --MD --eqx ${db} \\\n', + ' ${filename} | \\\n', + ' samtools sort -@ 16 - | \\\n', + ' awk \'BEGIN { FS=OFS="\\t" } /^@/ { print; next } ' + '{ $10="*"; $11="*" } 1\' | \\\n', + f' xz -1 -T1 > {out_dir}/alignments/${{sample_name}}.sam.xz'] + self.assertEqual(obs_main, exp_main) + + db_path = '/projects/wol/qiyun/wol2/databases/minimap2' + exp_merge = [ + '#!/bin/bash\n', + '#SBATCH -J me_my-job-id\n', + '#SBATCH -p qiita\n', + '#SBATCH -N 1\n', + '#SBATCH -n 16\n', + '#SBATCH --time 1000\n', + '#SBATCH --mem 120G\n', + f'#SBATCH -o {out_dir}/merge/logs/%x-%A_%a.out\n', + f'#SBATCH -e {out_dir}/merge/logs/%x-%A_%a.out\n', + '\n', + 'source ~/.bashrc\n', + 'set -e\n', + 'conda activate qp_pacbio_2025.9\n', + f'cd {out_dir}/\n', + f'tax={db_path}/WoLr2.tax\n', + f'coords={db_path}/WoLr2.coords\n', + f'len_map={db_path}/WoLr2/length.map\n', + f'functional_dir={db_path}/WoLr2/\n', + '\n', + f'mkdir -p {out_dir}/coverages/\n', + '\n', + 'for f in `ls alignments/*.sam.xz`; do\n', + ' sn=`basename ${f/.sam.xz/}`;\n', + f' of={out_dir}/bioms/${{sn}};\n', + ' mkdir -p ${of};\n', + ' echo "woltka classify -i ${f} -o ${of}/none.biom ' + '--no-demux --lineage ${tax} ' + f'--rank none --outcov {out_dir}/coverages/";\n', + ' echo "woltka classify -i ${f} -o ${of}/per-gene.biom ' + '--no-demux -c ${coords}";\n', + 'done | parallel -j 1\n', + 'wait\n', + '\n', + 'for f in `ls bioms/*/per-gene.biom`; do\n', + ' dn=`dirname ${f}`;\n', + ' sn=`basename ${sn}`;\n', + ' echo "woltka collapse -i ${f} -m ${functional_dir}/' + 'orf-to-ko.map.xz -o ${dn}/ko.biom; " \\\n', + ' "woltka collapse -i ${dn}/ko.biom -m ${functional_dir}/' + 'ko-to-ec.map -o ${dn}/ec.biom; " \\\n', + ' "woltka collapse -i ${dn}/ko.biom -m ${functional_dir}/' + 'ko-to-reaction.map -o ${dn}/reaction.biom; " \\\n', + ' "woltka collapse -i ${dn}/reaction.biom -m ' + '${functional_dir}/reaction-to-module.map -o ' + '${dn}/module.biom; " \\\n', + ' "woltka collapse -i ${dn}/module.biom -m ' + '${functional_dir}/module-to-pathway.map ' + '-o ${dn}/pathway.biom;"\n', + 'done | parallel -j 1\n', + 'wait\n', + '\n', + '# MISSING:\n', + '# merge bioms!\n', + '\n', + f'find {out_dir}/coverages/ -iname "*.cov" > ' + f'{out_dir}/cov_files.txt\n', + f'micov consolidate --paths {out_dir}/cov_files.txt ' + f'--lengths ${{len_map}} --output {out_dir}/coverages.tgz\n', + '\n', + 'cd alignments\n', + 'tar -cvf ../alignments.tar *.sam.xz'] + self.assertEqual(obs_merge, exp_merge) + + if __name__ == "__main__": main() diff --git a/qp_pacbio/util.py b/qp_pacbio/util.py new file mode 100644 index 0000000..d6d1143 --- /dev/null +++ b/qp_pacbio/util.py @@ -0,0 +1,39 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2025--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- +from os import environ +from os.path import join, expanduser +from configparser import ConfigParser + +from qiita_client import QiitaClient + + +plugin_details = {'name': 'qp-pacbio', + 'version': '2025.9', + 'description': 'PacBio processing'} + + +def client_connect(url): + name = plugin_details["name"] + version = plugin_details["version"] + + config = ConfigParser() + conf_dir = environ.get( + "QIITA_PLUGINS_DIR", join(expanduser("~"), ".qiita_plugins") + ) + conf_fp = join(conf_dir, f"{name}_{version}.conf") + + with open(conf_fp, "r", encoding="utf-8") as conf_file: + config.read_file(conf_file) + + qclient = QiitaClient( + url, + config.get("oauth2", "CLIENT_ID"), + config.get("oauth2", "CLIENT_SECRET"), + config.get("oauth2", "SERVER_CERT"), + ) + return qclient diff --git a/runner.py b/runner.py index 5b4f73f..bcb9eb1 100644 --- a/runner.py +++ b/runner.py @@ -6,12 +6,8 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -from configparser import ConfigParser -from os import environ -from os.path import expanduser, join - -from qp_pacbio.qp_pacbio import generate_sample_list, generate_templates -from qiita_client import QiitaClient +from qp_pacbio.qp_pacbio import generate_sample_list, pacbio_generate_templates +from qp_pacbio.util import client_connect import click from subprocess import run @@ -26,28 +22,6 @@ QIITA_URL = "https://qiita.ucsd.edu" -def client_connect(url): - name = plugin_details["name"] - version = plugin_details["version"] - - config = ConfigParser() - conf_dir = environ.get( - "QIITA_PLUGINS_DIR", join(expanduser("~"), ".qiita_plugins") - ) - conf_fp = join(conf_dir, f"{name}_{version}.conf") - - with open(conf_fp, "r", encoding="utf-8") as conf_file: - config.read_file(conf_file) - - qclient = QiitaClient( - url, - config.get("oauth2", "CLIENT_ID"), - config.get("oauth2", "CLIENT_SECRET"), - config.get("oauth2", "SERVER_CERT"), - ) - return qclient - - @click.command() @click.option("--artifact_id", help="artifact id", required=True) @click.option("--out_dir", help="output directory", required=True) @@ -59,10 +33,9 @@ def sbatch(args): qclient = client_connect(QIITA_URL) njobs = generate_sample_list(qclient, artifact_id, out_dir) - generate_templates(out_dir, job_id, njobs) + pacbio_generate_templates(out_dir, job_id, njobs) print(qclient.artifact_and_preparation_files(artifact_id)) - jid0 = sbatch(["sbatch", "--parsable", f"{out_dir}/step-0/step-0.slurm"]) jid1 = sbatch(["sbatch", "--parsable", f"{out_dir}/step-1/step-1.slurm"]) jid2 = sbatch( [ @@ -119,7 +92,7 @@ def sbatch(args): ] ) - submitted = [jid0, jid1, jid2, jid3, jid4, jid5, jid6, jid7] + submitted = [jid1, jid2, jid3, jid4, jid5, jid6, jid7] print("Submitted jobs: " + ", ".join(submitted))