Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option for hmmratac to use BEDPE files as input #580

Merged
merged 1 commit into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions MACS3/Commands/hmmratac_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2023-07-28 12:10:12 Tao Liu>
# Time-stamp: <2023-08-02 17:32:47 Tao Liu>

"""Description: Main HMMR command

Expand Down Expand Up @@ -26,7 +26,7 @@
from MACS3.Utilities.OptValidator import opt_validate_hmmratac
from MACS3.IO.PeakIO import PeakIO
from MACS3.IO.PeakIO import BroadPeakIO
from MACS3.IO.Parser import BAMPEParser #BAMaccessor
from MACS3.IO.Parser import BAMPEParser, BEDPEParser #BAMaccessor
from MACS3.Signal.HMMR_EM import HMMR_EM
from MACS3.Signal.HMMR_Signal_Processing import generate_weight_mapping, generate_digested_signals, extract_signals_from_regions
from MACS3.Signal.HMMR_HMM import hmm_training, hmm_predict, hmm_model_init, hmm_model_save
Expand Down Expand Up @@ -71,20 +71,26 @@ def run( args ):
cutoffanalysis_file = os.path.join( options.outdir, options.name+"_cutoff_analysis.tsv" )

#############################################
# 1. Read the input BAM files
# 1. Read the input files
#############################################
options.info("\n" + options.argtxt)
#options.info("Some important parameters for this run")
#options.info(f" binsize for HMM: {options.binsize} ()")
options.info("#1 Read fragments from BAM file...")

bam = BAMPEParser(options.bam_file[0], buffer_size=options.buffer_size)
petrack = bam.build_petrack()
if len( options.bam_file ) > 1:
if options.format == "BAMPE":
options.info("#1 Read fragments from BAMPE file...")
parser = BAMPEParser
elif options.format == "BEDPE":
options.info("#1 Read fragments from BEDPE file...")
parser = BEDPEParser
else:
raise Exception("wrong format")

alignment = parser(options.input_file[0], buffer_size=options.buffer_size)
petrack = alignment.build_petrack()
if len( options.input_file ) > 1:
# multiple input
for bamfile in options.bam_file[1:]:
bam = BAMPEParser(bamfile, buffer_size=options.buffer_size)
petrack = bam.append_petrack( petrack )
for inputfile in options.input_file[1:]:
alignment = parser(inputfile, buffer_size=options.buffer_size)
petrack = alignment.append_petrack( petrack )
# remember to finalize the petrack
petrack.finalize()

Expand Down
28 changes: 22 additions & 6 deletions bin/macs3
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Time-stamp: <2023-07-28 12:14:37 Tao Liu>
# Time-stamp: <2023-08-02 17:49:12 Tao Liu>

"""Description: MACS v3 main executable.

Expand Down Expand Up @@ -759,7 +759,19 @@ around open chromatin regions.
Here's an example of how to run the `hmmratac` command:

```
$ macs3 hmmratac -b test/yeast_500k_SRR1822137.bam -n hmmratac_yeast500k
$ macs3 hmmratac -i yeast.bam -n yeast
```

or with the BEDPE format

```
$ macs3 hmmratac -i yeast.bedpe.gz -f BEDPE -n yeast
```

Note: you can convert BAMPE to BEDPE by using

```
$ macs3 filterdup --keep-dup all -f BAMPE -i yeast.bam -o yeast.bedpe
```

Before proceeding, it's essential to carefully read the help messages
Expand All @@ -774,11 +786,11 @@ crucial parameters we usually need to specify:

These three parameters can significantly influence the
results. Therefore, it's highly recommended to run `macs3 hmmratac
--cutoff-analysis-only -b your.bam` first, which will help you decide
--cutoff-analysis-only -i your.bam` first, which will help you decide
the parameters for `-l`, `-u`, and `-c`. Since there isn't an
automatic way to determine these parameters, your judgement will be
vital. Please read the output from `macs3 hmmratac
--cutoff-analysis-only -b your.bam` and the following section `Choices
--cutoff-analysis-only -i your.bam` and the following section `Choices
of cutoff values` for guidance.

* Choices of cutoff values *
Expand Down Expand Up @@ -877,8 +889,12 @@ plus an extra option for the HMM model file like `macs3 hmmratac

# group for input files
group_input = argparser_hmmratac.add_argument_group( "Input files arguments" )
group_input.add_argument( "-b", "--bam", dest = "bam_file", type = str, required = True, nargs = "+",
help = "BAM files containing the aligment results for ATAC-seq paired end reads. If multiple files are given as '-t A B C', then they will all be read and pooled together. The file should be in BAMPE format (aligned in paired end mode). REQUIRED." )
group_input.add_argument( "-i", "--input", dest = "input_file", type = str, required = True, nargs = "+",
help = "Input files containing the aligment results for ATAC-seq paired end reads. If multiple files are given as '-t A B C', then they will all be read and pooled together. The file should be in BAMPE or BEDPE format (aligned in paired end mode). Files can be gzipped. Note: all files should be in the same format! REQUIRED." )
group_input.add_argument( "-f", "--format", dest = "format", type = str,
choices = ("BAMPE", "BEDPE"),
help = "Format of input files, \"BAMPE\" or \"BEDPE\". If there are multiple files, they should be in the same format -- either BAMPE or BEDPE. Please check the definition in README. Also please note that the BEDPE only contains three columns -- chromosome, left position of the whole pair, right position of the whole pair-- and is NOT the same BEDPE format used by BEDTOOLS. To convert BAMPE to BEDPE, you can use this command `macs3 filterdup --keep-dup all -f BAMPE -i input.bam -o output.bedpe`. DEFAULT: \"BAMPE\"",
default = "BAMPE" )

# group for output files
group_output = argparser_hmmratac.add_argument_group( "Output arguments" )
Expand Down
11 changes: 7 additions & 4 deletions test/cmdlinetest
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Time-stamp: <2022-06-06 12:13:43 Tao Liu>
# Time-stamp: <2023-08-02 17:59:43 Tao Liu>

# integrative subcmds testing

Expand Down Expand Up @@ -38,6 +38,7 @@ CTRLBIGSPEEDTEST=input_12878_5M.bed.gz
CALLVARPEAK=callvar_testing.narrowPeak

ATACSEQBAM=yeast_500k_SRR1822137.bam
ATACSEQBED=yeast_500k_SRR1822137.bedpe.gz

# callpeak
echo "1. callpeak"
Expand Down Expand Up @@ -211,16 +212,18 @@ macs3 randsample -i $CHIPCONTIGS50K -n 100000 --seed 31415926 --outdir ${OUTPUTD
echo "15. hmmratac save training regions bed file and model file" # model file is written by default
mkdir ${OUTPUTDIR_PREFIX}_run_hmmratac

macs3 hmmratac -b $ATACSEQBAM -n hmmratac_yeast500k --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log
macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k --save-training-data --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac.log

macs3 hmmratac -i $ATACSEQBED -n hmmratac_yeast500k_bedpe -f BEDPE --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_bedpe.log

TRAINING_REGIONS_BED=${OUTPUTDIR_PREFIX}_run_hmmratac/hmmratac_yeast500k_training_regions.bed
HMM_MODEL=${OUTPUTDIR_PREFIX}_run_hmmratac/hmmratac_yeast500k_model.json

echo "15.1 hmmratac load training regions from bedfile"
macs3 hmmratac -b $ATACSEQBAM -n hmmratac_yeast500k_load_training_regions -t ${TRAINING_REGIONS_BED} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_training_regions.log
macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k_load_training_regions -t ${TRAINING_REGIONS_BED} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_training_regions.log

echo "15.2 hmmratac load hmm model file"
macs3 hmmratac -b $ATACSEQBAM -n hmmratac_yeast500k_load_hmm_model --model ${HMM_MODEL} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_hmm_model.log
macs3 hmmratac -i $ATACSEQBAM -n hmmratac_yeast500k_load_hmm_model --model ${HMM_MODEL} --outdir ${OUTPUTDIR_PREFIX}_run_hmmratac &> ${OUTPUTDIR_PREFIX}_run_hmmratac/run_hmmratac_load_hmm_model.log

echo "16. search for errors or warnings in log files"
flag=0
Expand Down
Loading