diff --git a/.travis.yml b/.travis.yml index ea4fd5c78..53005e346 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,25 @@ language: python +env: + global: + - GATK_PATH=bin_bundles/GenomeAnalysisTK-3.3-0-g37228af + - NOVOALIGN_PATH=bin_bundles/novocraft_v3 + - PYTHONIOENCODING=UTF8 + - secure: l9tLtFKGNhaRdRN2N7Fiks63VatVCOtDUG7FI/pi7JNJu/EriTwDRlncoVCRCJZKOdxG8OrwC1BLX6CNqpVjJISEPGV/djsf2wCV9vi6oa+OsvMymsJAjOYkLezwRLVZp/0l/sGumPGz+q+XIM8VnkOZezIvZjGaaAtBpRTHdmA= + python: - 2.7 - 3.4 before_install: - - export PYTHONIOENCODING=UTF8 - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - chmod +x miniconda.sh - ./miniconda.sh -b - export PATH=/home/travis/miniconda/bin:$PATH - conda update --yes conda + - wget http://www.broadinstitute.org/~dpark/viral_ngs-gatk_novoalign-encrypted_for_travis.tar.gz.enc + - openssl aes-256-cbc -d -k "$BUNDLE_SECRET" -in viral_ngs-gatk_novoalign-encrypted_for_travis.tar.gz.enc -out bin_bundles.tar.gz + - tar -xzpvf bin_bundles.tar.gz install: - conda create -n env-conda --yes "python=$TRAVIS_PYTHON_VERSION" @@ -19,11 +28,9 @@ install: - pip install -q `cat requirements.txt | grep -v numpy | grep -v scipy | grep -vi cython` - pip install -q coveralls nose-cov -# command to run tests script: - python -m unittest -v test.test_tools.TestToolsInstallation - nosetests -v --with-xunit --with-coverage --cover-package broad_utils,assembly,interhost,intrahost,ncbi,read_utils,reports,taxon_filter,tools,util --cover-erase --cover-inclusive --cover-branches --cover-tests --nocapture -# post-tests after_success: - coveralls diff --git a/assembly.py b/assembly.py index 01fb70b4e..c7ac5c418 100755 --- a/assembly.py +++ b/assembly.py @@ -7,28 +7,42 @@ __author__ = "dpark@broadinstitute.org, rsealfon@broadinstitute.org" __commands__ = [] -import argparse, logging, random +import argparse, logging, random, os, os.path, shutil import Bio.AlignIO, Bio.SeqIO, Bio.Data.IUPACData import util.cmd, util.file, util.vcf +import read_utils, taxon_filter +import tools, tools.picard, tools.samtools, tools.gatk, tools.novoalign, tools.muscle log = logging.getLogger(__name__) -def assemble_trinity(inBam, clipDb, n_reads): +def assemble_trinity(inBam, outFasta, clipDb, n_reads=100000): ''' This step runs the Trinity assembler. First trim reads with trimmomatic, rmdup with prinseq, and random subsample to no more than 100k reads. ''' + infq = map(util.file.mkstempfname, ['.in.1.fastq', '.in.2.fastq']) + tools.picard.SamToFastqTool().execute(inBam, infq[0], infq[1]) + + trimfq = map(util.file.mkstempfname, ['.trim.1.fastq', '.trim.2.fastq']) + taxon_filter.trimmomatic(infq[0], infq[1], trimfq[0], trimfq[1], clipDb) + map(os.unlink(infq)) + + rmdupfq = map(util.file.mkstempfname, ['.rmdup.1.fastq', '.rmdup.2.fastq']) + read_utils.rmdup_prinseq_fastq(trimfq[0], trimfq[1], rmdupfq[0], rmdupfq[1]) + map(os.unlink(trimfq)) + + purgefq = map(util.file.mkstempfname, ['.fix.1.fastq', '.fix.2.fastq']) + read_utils.purge_unmated(rmdupfq[0], rmdupfq[1], purgefq[0], purgefq[1]) + map(os.unlink(rmdupfq)) + + raise NotImplementedError() ''' - shell("{config[binDir]}/read_utils.py bam_to_fastq {input} {params.tmpf_infq}") - shell("{config[binDir]}/taxon_filter.py trim_trimmomatic {params.tmpf_infq} {params.tmpf_trim} {params.clipDb}") - shell("{config[binDir]}/read_utils.py rmdup_prinseq_fastq {params.tmpf_trim} {params.tmpf_rmdup}") - shell("{config[binDir]}/read_utils.py purge_unmated {params.tmpf_rmdup} {output[1]} {output[2]}") shell("{config[binDir]}/tools/scripts/subsampler.py -n {params.n_reads} -mode p -in {output[1]} {output[2]} -out {params.tmpf_subsamp}") shell("reuse -q Java-1.6 && perl /idi/sabeti-scratch/kandersen/bin/trinity_old/Trinity.pl --CPU 1 --min_contig_length 300 --seqType fq --left {params.tmpf_subsamp[0]} --right {params.tmpf_subsamp[1]} --output {params.tmpd_trinity}") - shutil.copyfile(params.tmpd_trinity+"/Trinity.fasta", output[0]) ''' - raise NotImplementedError() + shutil.copyfile(os.path.join(params.tmpd_trinity,"Trinity.fasta"), outFasta) + return 0 def align_and_orient_vfat(inFasta, inReference, outFasta, minLength, minUnambig, replaceLength): ''' This step cleans up the Trinity assembly with a known reference genome. @@ -47,6 +61,7 @@ def align_and_orient_vfat(inFasta, inReference, outFasta, minLength, minUnambig, positions with two steps of read-based refinement (below), and revert positions back to Ns where read support is lacking. ''' + raise NotImplementedError() ''' shell("{config[binDir]}/tools/scripts/vfat/orientContig.pl {input[0]} {params.refGenome} {params.tmpf_prefix}") shell("{config[binDir]}/tools/scripts/vfat/contigMerger.pl {params.tmpf_prefix}_orientedContigs {params.refGenome} -readfq {input[1]} -readfq2 {input[2]} -fakequals 30 {params.tmpf_prefix}") @@ -57,15 +72,33 @@ def align_and_orient_vfat(inFasta, inReference, outFasta, minLength, minUnambig, shell("cat {output[0]} {params.refGenome} | /idi/sabeti-scratch/kandersen/bin/muscle/muscle -out {params.tmpf_muscle} -quiet") refName = first_fasta_header(params.refGenome) shell("{config[binDir]}/assembly.py modify_contig {params.tmpf_muscle} {output[1]} {refName} --name {params.renamed_prefix}{wildcards.sample} --call-reference-ns --trim-ends --replace-5ends --replace-3ends --replace-length {params.replace_length} --replace-end-gaps") - index_novoalign(output[1]) - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[1]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[1]}") ''' - raise NotImplementedError() + + # Align to known reference and impute missing sequences + muscle_align = util.file.mkstempfname('.muscle.fasta') + fastaheadername = "??" + main_modify_contig(parser_modify_contig().parse_args([ + muscle_align, outFasta, inReference, + '--name', fastaheadername, + '--call-reference-ns', '--trim-ends', + '--replace-5ends', '--replace-3ends', + '--replace-length', str(replaceLength), + '--replace-end-gaps', + ])) + + + # Index final output FASTA for Picard/GATK, Samtools, and Novoalign + tools.picard.CreateSequenceDictionaryTool().execute(outFasta, overwrite=True) + tools.samtools.SamtoolsTool().faidx(outFasta, overwrite=True) + tools.novoalign.NovoalignTool().index_fasta(outFasta) + return 0 + -def refine_assembly_with_reads(inFasta, inBam, outFasta, outVcf=None, outBam=None, novo_params=''): - ''' This a refinement step where we take the VFAT assembly, - align all reads back to it, and modify the assembly to the majority +def refine_assembly(inFasta, inBam, outFasta, + outVcf=None, outBam=None, novo_params='', min_coverage=2, + JVMmemory=None): + ''' This a refinement step where we take a crude assembly, align + all reads back to it, and modify the assembly to the majority allele at each position based on read pileups. This step considers both SNPs as well as indels called by GATK and will correct the consensus based on GATK calls. @@ -74,20 +107,85 @@ def refine_assembly_with_reads(inFasta, inBam, outFasta, outVcf=None, outBam=Non and realigned with GATK's IndelRealigner (in order to call indels). Output FASTA file is indexed for Picard, Samtools, and Novoalign. ''' - ''' - shell("{config[binDir]}/assembly.py deambig_fasta {input[0]} {output[0]}") - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[0]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[0]}") - novoalign(input[1], input[0], wildcards.sample, params.tmpf_bam1, options=params.novoalign_options, min_qual=1) - shell("{config[binDir]}/read_utils.py mkdup_picard {params.tmpf_bam1} {params.tmpf_bam2} --remove --picardOptions CREATE_INDEX=true") - gatk_local_realign(params.tmpf_bam2, output[0], output[1], params.tmpf_intervals) - gatk_ug(output[1], output[0], output[2]) - shell("{config[binDir]}/assembly.py vcf_to_fasta {output[2]} {output[3]} --trim_ends --min_coverage 2") - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[3]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[3]}") - index_novoalign(output[3]) - ''' - raise NotImplementedError() + # Get tools + picard_index = tools.picard.CreateSequenceDictionaryTool() + picard_mkdup = tools.picard.MarkDuplicatesTool() + samtools = tools.samtools.SamtoolsTool() + novoalign = tools.novoalign.NovoalignTool() + gatk = tools.gatk.GATKTool() + + # Create deambiguated genome for GATK + deambigFasta = util.file.mkstempfname('.deambig.fasta') + deambig_fasta(inFasta, deambigFasta) + picard_index.execute(deambigFasta, overwrite=True) + samtools.faidx(deambigFasta, overwrite=True) + + # Novoalign reads to self + novoBam = util.file.mkstempfname('.novoalign.bam') + novoalign.execute(inBam, inFasta, novoBam, + options=novo_params.split(), min_qual=1, JVMmemory=JVMmemory) + rmdupBam = util.file.mkstempfname('.rmdup.bam') + picard_mkdup.execute([novoBam], rmdupBam, + picardOptions=['REMOVE_DUPLICATES=true', 'CREATE_INDEX=true'], JVMmemory=JVMmemory) + os.unlink(novoBam) + realignBam = util.file.mkstempfname('.realign.bam') + gatk.local_realign(rmdupBam, deambigFasta, realignBam, JVMmemory=JVMmemory) + os.unlink(rmdupBam) + if outBam: + shutil.copyfile(realignBam, outBam) + + # Modify original assembly with VCF calls from GATK + tmpVcf = util.file.mkstempfname('.vcf.gz') + gatk.ug(realignBam, deambigFasta, tmpVcf, JVMmemory=JVMmemory) + os.unlink(realignBam) + os.unlink(deambigFasta) + main_vcf_to_fasta(parser_vcf_to_fasta().parse_args([ + tmpVcf, outFasta, '--trim_ends', '--min_coverage', str(min_coverage), + ])) + if outVcf: + shutil.copyfile(tmpVcf, outVcf) + if outVcf.endswith('.gz'): + shutil.copyfile(tmpVcf+'.tbi', outVcf+'.tbi') + os.unlink(tmpVcf) + + # Index final output FASTA for Picard/GATK, Samtools, and Novoalign + picard_index.execute(outFasta, overwrite=True) + samtools.faidx(outFasta, overwrite=True) + novoalign.index_fasta(outFasta) + return 0 + +def parser_refine_assembly(): + parser = argparse.ArgumentParser(description = refine_assembly.__doc__) + parser.add_argument('inFasta', + help='Input assembly, FASTA format, pre-indexed for Picard, Samtools, and Novoalign.') + parser.add_argument('inBam', + help='Input reads, BAM format.') + parser.add_argument('outFasta', + help='Output refined assembly, FASTA format, indexed for Picard, Samtools, and Novoalign.') + parser.add_argument('--outBam', + default=None, + help='Reads aligned to inFasta. Unaligned and duplicate reads have been removed. GATK indel realigned.') + parser.add_argument('--outVcf', + default=None, + help='GATK genotype calls for genome in inFasta coordinate space.') + parser.add_argument('--novo_params', + default='-r Random -l 40 -g 40 -x 20 -t 100', + help='Alignment parameters for Novoalign.') + parser.add_argument('--min_coverage', + default=3, type=int, + help='Minimum read coverage required to call a position unambiguous.') + parser.add_argument('--JVMmemory', + default=tools.gatk.GATKTool.jvmMemDefault, + help='JVM virtual memory size (default: %(default)s)') + util.cmd.common_args(parser, (('loglevel',None), ('version',None), ('tmpDir',None))) + return parser +def main_refine_assembly(args): + refine_assembly(args.inFasta, args.inBam, args.outFasta, + args.outVcf, args.outBam, args.novo_params, args.min_coverage, + JVMmemory=args.JVMmemory) + return 0 +__commands__.append(('refine_assembly', main_refine_assembly, parser_refine_assembly)) + @@ -532,6 +630,12 @@ def deambig_base(base): non-ambiguous base from among the possibilities ''' return random.choice(Bio.Data.IUPACData.ambiguous_dna_values[base.upper()]) +def deambig_fasta(inFasta, outFasta): + with util.file.open_or_gzopen(outFasta, 'wt') as outf: + with util.file.open_or_gzopen(inFasta, 'rt') as inf: + for record in Bio.SeqIO.parse(inf, 'fasta'): + for line in util.file.fastaMaker([(record.id, ''.join(map(deambig_base, str(record.seq))))]): + outf.write(line) def parser_deambig_fasta(): parser = argparse.ArgumentParser( description='''Take input sequences (fasta) and replace any ambiguity bases with a @@ -542,12 +646,7 @@ def parser_deambig_fasta(): util.cmd.common_args(parser, (('loglevel',None), ('version',None))) return parser def main_deambig_fasta(args): - with open(args.outFasta, 'wt') as outf: - with open(args.inFasta, 'rt') as inf: - for record in Bio.SeqIO.parse(inf, 'fasta'): - for line in util.file.fastaMaker([(record.id, ''.join(map(deambig_base, str(record.seq))))]): - outf.write(line) - log.info("done") + deambig_fasta(args.inFasta, args.outFasta) return 0 __commands__.append(('deambig_fasta', main_deambig_fasta, parser_deambig_fasta)) diff --git a/pipes/Snakefile b/pipes/Snakefile index 00fc33584..d756fab7b 100644 --- a/pipes/Snakefile +++ b/pipes/Snakefile @@ -14,6 +14,8 @@ configfile: "config.json" include: config["binDir"]+"/pipes/rules/common.rules" +set_env_vars() + include: config["binDir"]+"/pipes/rules/demux.rules" include: config["binDir"]+"/pipes/rules/hs_deplete.rules" include: config["binDir"]+"/pipes/rules/assembly.rules" diff --git a/pipes/config.json b/pipes/config.json index 883d62785..e6d1037a7 100644 --- a/pipes/config.json +++ b/pipes/config.json @@ -4,8 +4,6 @@ "samples_assembly": "samples-assembly.txt", "samples_per_run": "samples-runs.txt", - "deplete_bmtagger_nchunks": 4, - "deplete_blast_nchunks": 2, "bmTaggerDbDir": "/idi/sabeti-scratch/kandersen/references/bmtagger", "bmTaggerDbs_remove": [ "hg19", @@ -27,6 +25,11 @@ "ebov_2014": "", "ebov": "" }, + + "env_vars": { + "GATK_PATH": "/humgen/gsa-hpprojects/GATK/bin/GenomeAnalysisTK-3.3-0-g37228af", + "NOVOALIGN_PATH": "/idi/sabeti-scratch/kandersen/bin/novocraft_v3" + }, "subdirs": { "demux": "00_demux", diff --git a/pipes/rules/assembly.rules b/pipes/rules/assembly.rules index 3721f30d8..ecdda3ea6 100644 --- a/pipes/rules/assembly.rules +++ b/pipes/rules/assembly.rules @@ -137,7 +137,7 @@ rule align_and_orient: refName = first_fasta_header(params.refGenome) shell("{config[binDir]}/assembly.py modify_contig {params.tmpf_muscle} {output[1]} {refName} --name {params.renamed_prefix}{wildcards.sample} --call-reference-ns --trim-ends --replace-5ends --replace-3ends --replace-length {params.replace_length} --replace-end-gaps") assert_nonempty_file(output[1]) - index_novoalign(output[1]) + shell("{config[binDir]}/read_utils.py novoindex {output[1]}") shell("{config[binDir]}/read_utils.py index_fasta_picard {output[1]}") shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[1]}") os.unlink(params.tmpf_muscle) @@ -156,33 +156,14 @@ rule refine_assembly_1: ''' input: config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3-modify.fasta', config["dataDir"]+'/'+config["subdirs"]["per_sample"]+'/{sample}.cleaned.bam' - output: config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3-deambig.fasta', - config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3.bam', - config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3.vcf.gz', - config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4-refined.fasta' + output: config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4-refined.fasta', + config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3.vcf.gz' resources: mem=4 params: LSF='-W 4:00', logid="{sample}", novoalign_options = "-r Random -l 30 -g 40 -x 20 -t 502", - min_coverage = "2", - tmpf_intervals = config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3.intervals', - tmpf_bam1 = config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3_pre_rmdup.bam', - tmpf_bam2 = config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3_pre_indel_realign.bam' - run: - update_timestamps(input) - shell("{config[binDir]}/assembly.py deambig_fasta {input[0]} {output[0]}") - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[0]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[0]}") - novoalign(input[1], input[0], wildcards.sample, params.tmpf_bam1, options=params.novoalign_options, min_qual=1) - shell("{config[binDir]}/read_utils.py mkdup_picard {params.tmpf_bam1} {params.tmpf_bam2} --remove --picardOptions CREATE_INDEX=true") - gatk_local_realign(params.tmpf_bam2, output[0], output[1], params.tmpf_intervals) - gatk_ug(output[1], output[0], output[2]) - shell("{config[binDir]}/assembly.py vcf_to_fasta {output[2]} {output[3]} --min_coverage {params.min_coverage} --trim_ends") - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[3]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[3]}") - index_novoalign(output[3]) - os.unlink(params.tmpf_bam1) - os.unlink(params.tmpf_bam2) + min_coverage = "2" + shell: "{config[binDir]}/assembly.py refine_assembly {input} {output[0]} --outVcf {output[1]} --min_coverage {params.min_coverage} --novo_params '{params.novoalign_options}'" rule refine_assembly_2: ''' This a second pass refinement step very similar to the first. @@ -197,33 +178,14 @@ rule refine_assembly_2: ''' input: config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4-refined.fasta', config["dataDir"]+'/'+config["subdirs"]["per_sample"]+'/{sample}.raw.bam' - output: config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4-deambig.fasta', - config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4.bam', - config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4.vcf.gz', - config["dataDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.fasta' + output: config["dataDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.fasta', + config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4.vcf.gz' resources: mem=4 params: LSF='-W 4:00', logid="{sample}", novoalign_options = "-r Random -l 40 -g 40 -x 20 -t 100", - min_coverage = "3", - tmpf_intervals = config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4.intervals', - tmpf_bam1 = config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4_pre_rmdup.bam', - tmpf_bam2 = config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly4_pre_indel_realign.bam' - run: - update_timestamps(input) - shell("{config[binDir]}/assembly.py deambig_fasta {input[0]} {output[0]}") - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[0]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[0]}") - novoalign(input[1], input[0], wildcards.sample, params.tmpf_bam1, options=params.novoalign_options, min_qual=1) - shell("{config[binDir]}/read_utils.py mkdup_picard {params.tmpf_bam1} {params.tmpf_bam2} --remove --picardOptions CREATE_INDEX=true") - gatk_local_realign(params.tmpf_bam2, output[0], output[1], params.tmpf_intervals) - gatk_ug(output[1], output[0], output[2]) - shell("{config[binDir]}/assembly.py vcf_to_fasta {output[2]} {output[3]} --min_coverage {params.min_coverage} --trim_ends") - shell("{config[binDir]}/read_utils.py index_fasta_picard {output[3]}") - shell("{config[binDir]}/read_utils.py index_fasta_samtools {output[3]}") - index_novoalign(output[3]) - os.unlink(params.tmpf_bam1) - os.unlink(params.tmpf_bam2) + min_coverage = "3" + shell: "{config[binDir]}/assembly.py refine_assembly {input} {output[0]} --outVcf {output[1]} --min_coverage {params.min_coverage} --novo_params '{params.novoalign_options}'" rule map_reads_to_self: ''' After the final assembly is produced, we also produce BAM files with all reads @@ -242,14 +204,11 @@ rule map_reads_to_self: resources: mem=4 params: LSF='-W 4:00', logid="{sample}", - novoalign_options = "-r Random -l 40 -g 40 -x 20 -t 100 -k -c 3", - tmpf_intervals=config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.aligned_to_self.intervals' + novoalign_options = "-r Random -l 40 -g 40 -x 20 -t 100 -k -c 3" run: - update_timestamps(input) makedirs(os.path.join(config["dataDir"], config["subdirs"]["align_self"])) - novoalign(input[1], input[0], wildcards.sample, output[0], options=params.novoalign_options) - filter_bam_mapped_only(output[0], output[1]) + shell("{config[binDir]}/read_utils.py novoalign {input[1]} {input[0]} {output[0]} --options '{params.novoalign_options}'") + shell("{config[binDir]}/read_utils.py filter_bam_mapped_only {output[0]} {output[1]}") shell("{config[binDir]}/read_utils.py mkdup_picard {output[1]} {output[2]} --remove --picardOptions CREATE_INDEX=true") - gatk_local_realign(output[2], input[0], output[3], params.tmpf_intervals) - os.unlink(params.tmpf_intervals) + shell("{config[binDir]}/read_utils.py gatk_realign {output[2]} {input[0]} {output[3]}") diff --git a/pipes/rules/common.rules b/pipes/rules/common.rules index 77ca5b507..52613aed4 100644 --- a/pipes/rules/common.rules +++ b/pipes/rules/common.rules @@ -1,5 +1,8 @@ - +def set_env_vars(): + import os + for k,v in config.get('env_vars', {}).items(): + os.environ[k] = v def read_tab_file(fname): with open(fname, 'rt') as inf: @@ -27,30 +30,3 @@ def update_timestamps(files): if os.path.isfile(f) and os.path.getmtime(f) > time.time(): print("input file %s is more recent than present, resetting its modification time to present" % f) os.utime(f) - - -# TO DO: everything below should get subsumed into the python codebase and removed from here. - -def index_novoalign(fasta): - outfname = fasta[:-6] + '.nix' - if os.path.isfile(outfname): - os.unlink(outfname) - shell("/idi/sabeti-scratch/kandersen/bin/novocraft/novoindex {outfname} {fasta} && chmod a-x {outfname}") - -def novoalign(inBam, refFasta, sample_name, outBam, options="-r Random", min_qual=0): - refFastaIdx = refFasta[:-6] + '.nix' - cmd = "/idi/sabeti-scratch/kandersen/bin/novocraft_v3/novoalign -f {inBam} {options} -F BAMPE -d {refFastaIdx} -o SAM " - if min_qual>0: - cmd += "| /idi/sabeti-data/software/samtools/samtools-0.1.19/samtools view -buS -q {min_qual} - " - cmd += "| java -Xmx2g -jar /seq/software/picard/1.802/bin/SortSam.jar SO=coordinate I=/dev/stdin O={outBam} CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT" - shell(cmd) - -def gatk_ug(inBam, refFasta, outVcf, options="--min_base_quality_score 15 -ploidy 4"): - shell("java -Xmx2g -jar /humgen/gsa-hpprojects/GATK/bin/GenomeAnalysisTK-3.3-0-g37228af/GenomeAnalysisTK.jar -T UnifiedGenotyper -R {refFasta} -I {inBam} -o {outVcf} {options} -glm BOTH --baq OFF --useOriginalQualities -out_mode EMIT_ALL_SITES -dt NONE --num_threads 1 -stand_call_conf 0 -stand_emit_conf 0 -A AlleleBalance") - -def gatk_local_realign(inBam, refFasta, outBam, tmpIntervals): - shell("java -Xmx2g -jar /humgen/gsa-hpprojects/GATK/bin/GenomeAnalysisTK-3.3-0-g37228af/GenomeAnalysisTK.jar -T RealignerTargetCreator -R {refFasta} -o {tmpIntervals} -I {inBam}") - shell("java -Xmx2g -jar /humgen/gsa-hpprojects/GATK/bin/GenomeAnalysisTK-3.3-0-g37228af/GenomeAnalysisTK.jar -T IndelRealigner -R {refFasta} -targetIntervals {tmpIntervals} -I {inBam} -o {outBam}") - -def filter_bam_mapped_only(inBam, outBam): - shell("/idi/sabeti-data/software/samtools/samtools-0.1.19/samtools view -b -q 1 -u {inBam} | java -Xmx2g -jar /seq/software/picard/1.802/bin/SortSam.jar SO=coordinate I=/dev/stdin O={outBam} CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT") diff --git a/pipes/rules/interhost.rules b/pipes/rules/interhost.rules index 127dc37ed..0f7ce11fa 100644 --- a/pipes/rules/interhost.rules +++ b/pipes/rules/interhost.rules @@ -37,17 +37,15 @@ rule map_reads_to_ref: params: LSF='-W 4:00', logid="{sample}", refGenome=config["ref_genome"], - tmpf_intervals=config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.aligned_to_ref.intervals' + novoalign_options="-r Random -l 30 -g 40 -x 20 -t 502" run: - update_timestamps(input) makedirs(expand("{dir}/{subdir}", dir=[config["dataDir"], config["tmpDir"]], subdir=[config["subdirs"]["align_ref"], config["subdirs"]["assembly"]])) - novoalign(input[0], params.refGenome, wildcards.sample, output[0], options="-r Random -l 30 -g 40 -x 20 -t 502") - filter_bam_mapped_only(output[0], output[1]) + shell("{config[binDir]}/read_utils.py novoalign {input[0]} {params.refGenome} {output[0]} --options '{params.novoalign_options}'") + shell("{config[binDir]}/read_utils.py filter_bam_mapped_only {output[0]} {output[1]}") shell("{config[binDir]}/read_utils.py mkdup_picard {output[1]} {output[2]} --remove --picardOptions CREATE_INDEX=true") - gatk_local_realign(output[2], params.refGenome, output[3], params.tmpf_intervals) - os.unlink(params.tmpf_intervals) + shell("{config[binDir]}/read_utils.py gatk_realign {output[2]} {params.refGenome} {output[3]}") rule ref_guided_consensus: input: config["dataDir"]+'/'+config["subdirs"]["align_ref"]+'/{sample}.realigned.bam' @@ -110,8 +108,6 @@ rule multi_align_mafft: shell("/idi/sabeti-scratch/kandersen/bin/mafft/core/mafft --localpair --maxiterate 1000 --reorder --ep 0.123 --preservecase --thread 4 {input} > {params.tmpf_mafft}") shell("/idi/sabeti-scratch/kandersen/bin/trimal/trimal -phylip -automated1 -in {params.tmpf_mafft} -out {output} -htmlout {params.log_trimal} -colnumbering") update_timestamps(input) - os.unlink(params.tmpf_metrics) - os.unlink(params.tmpf_intervals) # Make sure all file-names are unique when cut down to 10 characters - e.g. if analysing Lassa 'LASV-' identifier needs to be removed from the input sequence file. diff --git a/pipes/rules/reports.rules b/pipes/rules/reports.rules index a0ffa9a26..1117b0d51 100644 --- a/pipes/rules/reports.rules +++ b/pipes/rules/reports.rules @@ -156,11 +156,12 @@ rule spikein_report: params: LSF='-W 4:00', logid="{sample}", spike_in_fasta=config["spikeinsDb"], + novoalign_options="-r Random", tmpf_spike_bam=config["tmpDir"]+'/'+config["subdirs"]["depletion"]+'/{sample}.cleaned.aligned_to_spikes.bam' run: makedirs(os.path.join(config["reportsDir"], 'spike_count')) makedirs(os.path.join(config["tmpDir"], config["subdirs"]["depletion"])) - novoalign(input, params.spike_in_fasta, wildcards.sample, params.tmpf_spike_bam, options="-r Random", min_qual=1) + shell("{config[binDir]}/read_utils.py novoalign {input} {params.spike_in_fasta} {params.tmpf_spike_bam} --options '{params.novoalign_options}'") shell("/idi/sabeti-scratch/kandersen/bin/scripts/CountAlignmentsByDescriptionLine -bam {params.tmpf_spike_bam} > {output}") os.unlink(params.tmpf_spike_bam) os.unlink(params.tmpf_spike_bam[:-1] + 'i') diff --git a/read_utils.py b/read_utils.py index bf7461a77..e8527f8be 100755 --- a/read_utils.py +++ b/read_utils.py @@ -15,6 +15,7 @@ import util.cmd, util.file from util.file import mkstempfname import tools.picard, tools.samtools, tools.mvicuna, tools.prinseq +import tools.novoalign, tools.gatk log = logging.getLogger(__name__) @@ -23,7 +24,7 @@ # *** purge_unmated *** # ======================= -def purge_unmated(inFastq1, inFastq2, outFastq1, outFastq2, regex) : +def purge_unmated(inFastq1, inFastq2, outFastq1, outFastq2, regex='^@(\S+)/[1|2]$') : """Use mergeShuffledFastqSeqs to purge unmated reads, and put corresponding reads in the same order.""" tempOutput = mkstempfname() @@ -702,7 +703,120 @@ def main_rmdup_prinseq_fastq(args): main_rmdup_prinseq_fastq, parser_rmdup_prinseq_fastq)) -# ======================= +def filter_bam_mapped_only(inBam, outBam, JVMmemory=None): + # filter to aligned-only with Samtools + tmp_bam = util.file.mkstempfname('.bam') + cmd = [tools.samtools.SamtoolsTool().install_and_get_path(), + 'view', '-b', '-1', '-q', '1', inBam] + log.debug(' '.join(cmd) +' > '+ tmp_bam) + with open(tmp_bam, 'wb') as outf: + subprocess.check_call(cmd, stdout=outf) + # fix headers and create index with Picard + tools.picard.SortSamTool().execute(tmp_bam, outBam, sort_order='coordinate', + picardOptions=['CREATE_INDEX=true', 'VALIDATION_STRINGENCY=SILENT'], + JVMmemory=JVMmemory) + os.unlink(tmp_bam) + return 0 +def parser_filter_bam_mapped_only(): + parser = argparse.ArgumentParser( + description='''Samtools and Picard to reduce a BAM file to aligned reads only.''') + parser.add_argument('inBam', + help='Input aligned reads, BAM format.') + parser.add_argument('outBam', + help='Output sorted indexed reads, filtered to aligned-only, BAM format.') + parser.add_argument('--JVMmemory', default = tools.picard.SortSamTool.jvmMemDefault, + help='JVM virtual memory size (default: %(default)s)') + util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmpDir', None))) + return parser +def main_filter_bam_mapped_only(args): + filter_bam_mapped_only(args.inBam, args.outBam, JVMmemory=args.JVMmemory) + return 0 +__commands__.append(('filter_bam_mapped_only', + main_filter_bam_mapped_only, parser_filter_bam_mapped_only)) + + +# ======= Novoalign ======== + +def parser_novoalign() : + parser = argparse.ArgumentParser( + description='''Align reads with Novoalign. Sort and index BAM output.''') + parser.add_argument('inBam', help='Input reads, BAM format.') + parser.add_argument('refFasta', help='Reference genome, FASTA format, pre-indexed by Novoindex.') + parser.add_argument('outBam', help='Output reads, BAM format (aligned).') + parser.add_argument('--options', default = '-r Random', + help='Novoalign options (default: %(default)s)') + parser.add_argument('--min_qual', default = 0, + help='Filter outBam to minimum mapping quality (default: %(default)s)') + parser.add_argument('--JVMmemory', default = tools.picard.SortSamTool.jvmMemDefault, + help='JVM virtual memory size (default: %(default)s)') + util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmpDir', None))) + return parser +def main_novoalign(args) : + novoalign = tools.novoalign.NovoalignTool() + novoalign.execute(args.inBam, args.refFasta, args.outBam, + options=args.options.split(), min_qual=args.min_qual, JVMmemory=args.JVMmemory) + return 0 +__commands__.append(('novoalign', main_novoalign, parser_novoalign)) + +def parser_novoindex() : + parser = argparse.ArgumentParser( + description='''Index a reference genome for use with Novoalign.''') + parser.add_argument('refFasta', help='Reference genome, FASTA format.') + util.cmd.common_args(parser, (('loglevel', None), ('version', None))) + return parser +def main_novoindex(args) : + tools.novoalign.NovoalignTool().index_fasta(args.refFasta) + return 0 +__commands__.append(('novoindex', main_novoindex, parser_novoindex)) + + +# ========= GATK ========== + +def parser_gatk_ug() : + parser = argparse.ArgumentParser( + description='''Call genotypes using the GATK UnifiedGenotyper.''') + parser.add_argument('inBam', + help='Input reads, BAM format.') + parser.add_argument('refFasta', + help='Reference genome, FASTA format, pre-indexed by Picard.') + parser.add_argument('outVcf', + help='''Output calls in VCF format. If this filename ends with .gz, + GATK will BGZIP compress the output and produce a Tabix index file as well.''') + parser.add_argument('--options', + default = '--min_base_quality_score 15 -ploidy 4', + help='UnifiedGenotyper options (default: %(default)s)') + parser.add_argument('--JVMmemory', default = tools.gatk.GATKTool.jvmMemDefault, + help='JVM virtual memory size (default: %(default)s)') + util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmpDir', None))) + return parser +def main_gatk_ug(args) : + gatk = tools.gatk.GATKTool() + gatk.ug(args.inBam, args.refFasta, args.outVcf, + options=args.options.split(), JVMmemory=args.JVMmemory) + return 0 +__commands__.append(('gatk_ug', main_gatk_ug, parser_gatk_ug)) + +def parser_gatk_realign() : + parser = argparse.ArgumentParser( + description='''Local realignment of BAM files with GATK IndelRealigner.''') + parser.add_argument('inBam', + help='Input reads, BAM format, aligned to refFasta.') + parser.add_argument('refFasta', + help='Reference genome, FASTA format, pre-indexed by Picard.') + parser.add_argument('outBam', + help='Realigned reads.') + parser.add_argument('--JVMmemory', default = tools.gatk.GATKTool.jvmMemDefault, + help='JVM virtual memory size (default: %(default)s)') + util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmpDir', None))) + return parser +def main_gatk_realign(args) : + tools.gatk.GATKTool().local_realign( + args.inBam, args.refFasta, args.outBam, JVMmemory=args.JVMmemory) + return 0 +__commands__.append(('gatk_realign', main_gatk_realign, parser_gatk_realign)) + + +# ========================= if __name__ == '__main__': util.cmd.main_argparse(__commands__, __doc__) diff --git a/test/input/TestToolNovoalign/ebola_expected.nix b/test/input/TestToolNovoalign/ebola_expected.nix new file mode 100644 index 000000000..395cd69ce Binary files /dev/null and b/test/input/TestToolNovoalign/ebola_expected.nix differ diff --git a/test/input/TestToolNovoalign/ebov_reads.bam b/test/input/TestToolNovoalign/ebov_reads.bam new file mode 100644 index 000000000..44536eed2 Binary files /dev/null and b/test/input/TestToolNovoalign/ebov_reads.bam differ diff --git a/test/test_tools_novoalign.py b/test/test_tools_novoalign.py new file mode 100644 index 000000000..217509c5f --- /dev/null +++ b/test/test_tools_novoalign.py @@ -0,0 +1,52 @@ +# Unit tests for Novoalign aligner + +__author__ = "dpark@broadinstitute.org" + +import unittest, os.path, shutil +import util.file, tools.novoalign +from test import TestCaseWithTmp + +class TestToolNovoalign(TestCaseWithTmp) : + + def setUp(self): + super(TestToolNovoalign, self).setUp() + self.novoalign = tools.novoalign.NovoalignTool() + self.novoalign.install() + + def test_index(self) : + orig_ref = os.path.join(util.file.get_test_input_path(), + 'ebola.fasta') + inRef = util.file.mkstempfname('.fasta') + shutil.copyfile(orig_ref, inRef) + self.novoalign.index_fasta(inRef) + outfile = inRef[:-6] + '.nix' + self.assertTrue(os.path.isfile(outfile)) + self.assertTrue(os.path.getsize(outfile)) + + def test_align(self) : + orig_ref = os.path.join(util.file.get_test_input_path(), + 'ebola.fasta') + inRef = util.file.mkstempfname('.fasta') + shutil.copyfile(orig_ref, inRef) + self.novoalign.index_fasta(inRef) + reads = os.path.join(util.file.get_test_input_path(self), + 'ebov_reads.bam') + outBam = util.file.mkstempfname('.bam') + self.novoalign.execute(reads, inRef, outBam) + self.assertTrue(os.path.isfile(outBam)) + self.assertTrue(os.path.getsize(outBam)) + self.assertTrue(os.path.isfile(outBam[:-1]+'i')) + + def test_align_filter(self) : + orig_ref = os.path.join(util.file.get_test_input_path(), + 'ebola.fasta') + inRef = util.file.mkstempfname('.fasta') + shutil.copyfile(orig_ref, inRef) + self.novoalign.index_fasta(inRef) + reads = os.path.join(util.file.get_test_input_path(self), + 'ebov_reads.bam') + outBam = util.file.mkstempfname('.bam') + self.novoalign.execute(reads, inRef, outBam, min_qual=1) + self.assertTrue(os.path.isfile(outBam)) + self.assertTrue(os.path.getsize(outBam)) + self.assertTrue(os.path.isfile(outBam[:-1]+'i')) diff --git a/tools/gatk.py b/tools/gatk.py index 0f15677a9..32f6cee0c 100644 --- a/tools/gatk.py +++ b/tools/gatk.py @@ -1,2 +1,73 @@ +''' + GATK genotyping toolkit from the Broad Institute + + This software has different licenses depending on use cases. + As such, we do not have an auto-downloader. The user must have GATK + pre-installed on their own and available in $GATK_PATH. +''' -# '/humgen/gsa-hpprojects/GATK/bin/GenomeAnalysisTK-3.3-0-g37228af' +import tools, tools.picard, tools.samtools, util.file +import logging, os, os.path, subprocess, tempfile + +log = logging.getLogger(__name__) + +class GATKTool(tools.Tool) : + jvmMemDefault = '2g' + def __init__(self, path=None): + self.tool_version = None + install_methods = [] + for jarpath in [path, os.environ.get('GATK_PATH')]: + if jarpath: + if not jarpath.endswith('.jar'): + jarpath = os.path.join(jarpath, 'GenomeAnalysisTK.jar') + install_methods.append(tools.PrexistingUnixCommand( + jarpath, verifycmd='java -jar %s --version' % jarpath, + verifycode=0, require_executability=False)) + tools.Tool.__init__(self, install_methods = install_methods) + + def execute(self, command, gatkOptions=[], JVMmemory=None) : + if JVMmemory==None: + JVMmemory = self.jvmMemDefault + toolCmd = ['java', + '-Xmx' + JVMmemory, + '-Djava.io.tmpdir=' + tempfile.tempdir, + '-jar', self.install_and_get_path(), + '-T', command] + list(map(str, gatkOptions)) + log.debug(' '.join(toolCmd)) + subprocess.check_call(toolCmd) + + def dict_to_gatk_opts(self, options) : + return ["%s=%s" % (k,v) for k,v in options.items()] + + def version(self): + if self.tool_version==None: + self._get_tool_version() + return self.tool_version + + def _get_tool_version(self): + cmd = ['java', '-jar', self.install_and_get_path(), '--version'] + self.tool_version = subprocess.check_output(cmd).strip() + + def ug(self, inBam, refFasta, outVcf, + options=["--min_base_quality_score", 15, "-ploidy", 4], + JVMmemory=None): + opts = ['-I', inBam, '-R', refFasta, '-o', outVcf, + '-glm', 'BOTH', + '--baq', 'OFF', + '--useOriginalQualities', + '-out_mode', 'EMIT_ALL_SITES', + '-dt', 'NONE', + '--num_threads', 1, + '-stand_call_conf', 0, + '-stand_emit_conf', 0, + '-A', 'AlleleBalance', + ] + self.execute('UnifiedGenotyper', opts + options, JVMmemory=JVMmemory) + + def local_realign(self, inBam, refFasta, outBam, JVMmemory=None): + intervals = util.file.mkstempfname('.intervals') + opts = ['-I', inBam, '-R', refFasta, '-o', intervals] + self.execute('RealignerTargetCreator', opts, JVMmemory=JVMmemory) + opts = ['-I', inBam, '-R', refFasta, '-targetIntervals', intervals, '-o', outBam] + self.execute('IndelRealigner', opts, JVMmemory=JVMmemory) + os.unlink(intervals) diff --git a/tools/novoalign.py b/tools/novoalign.py index 65eebe064..6259a94c3 100644 --- a/tools/novoalign.py +++ b/tools/novoalign.py @@ -1,13 +1,91 @@ -# /idi/sabeti-scratch/kandersen/bin/novocraft_v3/novoalign +''' + Novoalign aligner by Novocraft + + This is commercial software that has different licenses depending + on use cases. As such, we do not have an auto-downloader. The user + must have Novoalign pre-installed on their own and available + either in $PATH or $NOVOALIGN_PATH. +''' -# V2.08.03: X86-64 Linux http://www.novocraft.com/main/download.php?filename=V2.08.03/novocraftV2.08.03.gcc.tar.gz -# V2.08.03: Mac http://www.novocraft.com/main/download.php?filename=V2.08.03/novocraftV2.08.03.MacOSX.tar.gz +import tools, tools.picard, tools.samtools, util.file +import logging, os, os.path, subprocess, stat, gzip -# V3.02.02: X86-64 Linux 3.0 Kernel http://www.novocraft.com/main/download.php?filename=V3.02.02/novocraftV3.02.02.Linux3.0.tar.gz -# V3.02.02: X86-64 Linux 2.6 Kernel http://www.novocraft.com/main/download.php?filename=V3.02.02/novocraftV3.02.02.Linux2.6.tar.gz -# V2.02.02: Mac http://www.novocraft.com/main/download.php?filename=V3.02.02/novocraftV3.02.02.MacOSX.tar.gz +log = logging.getLogger(__name__) -def get_os_and_version() : - import os - uname = os.uname() - return uname[0], uname[2][:3] # e.g., (Darwin, 13.) or (Linux, 2.6) \ No newline at end of file +class NovoalignTool(tools.Tool) : + def __init__(self, path=None): + self.tool_version = None + install_methods = [] + for novopath in [path, os.environ.get('NOVOALIGN_PATH'), '']: + if novopath != None: + install_methods.append(tools.PrexistingUnixCommand( + os.path.join(novopath, 'novoalign'), + require_executability=True)) + tools.Tool.__init__(self, install_methods = install_methods) + + def version(self): + if self.tool_version==None: + self._get_tool_version() + return self.tool_version + + def _get_tool_version(self): + tmpf = util.file.mkstempfname('.novohelp.txt') + with open(tmpf, 'wt') as outf: + subprocess.call([self.install_and_get_path()], stdout=outf) + with open(tmpf, 'rt') as inf: + self.tool_version = inf.readline().strip().split()[1] + os.unlink(tmpf) + + def _fasta_to_idx_name(self, fasta): + if not fasta.endswith('.fasta'): + raise ValueError('input file %s must end with .fasta' % fasta) + return fasta[:-6] + '.nix' + + + def execute(self, inBam, refFasta, outBam, + options=["-r", "Random"], min_qual=0, JVMmemory=None): + ''' Execute Novoalign on BAM inputs and outputs. + Use Picard to sort and index the output BAM. + If min_qual>0, use Samtools to filter on mapping quality. + ''' + # Novoalign + tmp_sam = util.file.mkstempfname('.novoalign.sam') + cmd = [self.install_and_get_path(), '-f', inBam] + list(map(str, options)) + cmd = cmd + ['-F', 'BAMPE', '-d', self._fasta_to_idx_name(refFasta), '-o', 'SAM'] + log.debug(' '.join(cmd)) + with open(tmp_sam, 'wt') as outf: + subprocess.check_call(cmd, stdout=outf) + + # Samtools filter (optional) + if min_qual: + tmp_bam2 = util.file.mkstempfname('.filtered.bam') + samtools = tools.samtools.SamtoolsTool() + cmd = [samtools.install_and_get_path(), 'view', '-b', '-S', '-1', '-q', str(min_qual), tmp_sam] + log.debug('%s > %s' % (' '.join(cmd), tmp_bam2)) + with open(tmp_bam2, 'wb') as outf: + subprocess.check_call(cmd, stdout=outf) + os.unlink(tmp_sam) + tmp_sam = tmp_bam2 + + # Picard SortSam + sorter = tools.picard.SortSamTool() + sorter.execute(tmp_sam, outBam, sort_order='coordinate', + picardOptions=['CREATE_INDEX=true', 'VALIDATION_STRINGENCY=SILENT'], + JVMmemory=JVMmemory) + + + def index_fasta(self, fasta): + ''' Index a FASTA file (reference genome) for use with Novoalign. + The input file name must end in ".fasta". This will create a + new ".nix" file in the same directory. If it already exists, + it will be deleted and regenerated. + ''' + novoindex = os.path.join(os.path.dirname(self.install_and_get_path()), 'novoindex') + outfname = self._fasta_to_idx_name(fasta) + if os.path.isfile(outfname): + os.unlink(outfname) + cmd = [novoindex, outfname, fasta] + log.debug(' '.join(cmd)) + subprocess.check_call(cmd) + mode = os.stat(outfname).st_mode & ~stat.S_IXUSR & ~stat.S_IXGRP & ~stat.S_IXOTH + os.chmod(outfname, mode) diff --git a/util/version.py b/util/version.py index 89c24a644..c085393ed 100644 --- a/util/version.py +++ b/util/version.py @@ -6,21 +6,30 @@ import subprocess, os.path +def get_project_path() : + '''Return the absolute path of the top-level project, assumed to be the + parent of the directory containing this script.''' + # abspath converts relative to absolute path; expanduser interprets ~ + path = __file__ # path to this script + path = os.path.expanduser(path) # interpret ~ + path = os.path.abspath(path) # convert to absolute path + path = os.path.dirname(path) # containing directory: util + path = os.path.dirname(path) # containing directory: main project dir + return path + def call_git_describe(): try: - cmd = ['git', 'describe', '--tags', '--always', '--dirty'] + cmd = ['git', '-C', get_project_path(), 'describe', '--tags', '--always', '--dirty'] out = subprocess.check_output(cmd) if type(out) != str: out = out.decode('utf-8') - return out.strip() + ver = out.strip() except: - return None + ver = None + return ver def release_file(): - path = __file__ # path to this script - path = os.path.dirname(path) # containing directory: util - path = os.path.dirname(path) # containing directory: main project dir - return os.path.join(path, 'VERSION') # the VERSION file + return os.path.join(get_project_path(), 'VERSION') def read_release_version(): try: