Skip to content

Commit

Permalink
Merge pull request #3 from raphael-group/cleanprep
Browse files Browse the repository at this point in the history
clean preprocessing and argparse
  • Loading branch information
Congm12 authored Jun 9, 2024
2 parents 119b31a + 9114ac2 commit b1ef7bb
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 88 deletions.
92 changes: 5 additions & 87 deletions calicost.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import calicost.parse_input
rule all:
input:
f"{config['output_snpinfo']}/cell_snp_Aallele.npz",
# expand(f"{config['outputdir_calicost']}/summary{{r}}", r=config['random_state'])


rule link_or_merge_bam:
Expand All @@ -26,8 +25,8 @@ rule link_or_merge_bam:
if "bamlist" in config:
# merged BAM file
shell(f"python {config['calicost_dir']}/utils/merge_bamfile.py -b {config['bamlist']} -o {params.outputdir}/ >> {log} 2>&1")
shell(f"{config['samtools']} sort -m {params.samtools_sorting_mem} -o {output.bam} {params.outputdir}/unsorted_possorted_genome_bam.bam >> {log} 2>&1")
shell(f"{config['samtools']} index {output.bam}")
shell(f"samtools sort -m {params.samtools_sorting_mem} -o {output.bam} {params.outputdir}/unsorted_possorted_genome_bam.bam >> {log} 2>&1")
shell(f"samtools index {output.bam}")
shell(f"rm -fr {params.outputdir}/unsorted_possorted_genome_bam.bam")

# merged barcodes
Expand Down Expand Up @@ -65,7 +64,7 @@ rule genotype:
"{outputdir}/logs/genotyping.log"
run:
shell(f"mkdir -p {params.outputdir}/genotyping")
command = f"{config['cellsnplite']} -s {input.bam} " + \
command = f"cellsnp-lite -s {input.bam} " + \
f"-b {input.barcodefile} " + \
f"-O {params.outputdir}/genotyping/ " + \
f"-R {params.region_vcf} " + \
Expand All @@ -89,8 +88,8 @@ rule pre_phasing:
print(f"python {config['calicost_dir']}/utils/filter_snps_forphasing.py -c {params.outputdir}/genotyping -o {params.outputdir}/phasing")
shell(f"python {config['calicost_dir']}/utils/filter_snps_forphasing.py -c {params.outputdir}/genotyping -o {params.outputdir}/phasing")
for chrname in config["chromosomes"]:
shell(f"{config['bgzip']} -f {params.outputdir}/phasing/chr{chrname}.vcf")
shell(f"{config['tabix']} -f {params.outputdir}/phasing/chr{chrname}.vcf.gz")
shell(f"bgzip -f {params.outputdir}/phasing/chr{chrname}.vcf")
shell(f"tabix -f {params.outputdir}/phasing/chr{chrname}.vcf.gz")


rule phasing:
Expand Down Expand Up @@ -131,84 +130,3 @@ rule parse_final_snp:
f"-c {params.outputdir}/genotyping -e {params.outputdir}/phasing -b {params.outputdir}/barcodes.txt -o {params.outputdir}/ >> {log} 2>&1"
shell( command )


rule write_calicost_configfile:
input:
f"{config['output_snpinfo']}/cell_snp_Aallele.npz",
f"{config['output_snpinfo']}/cell_snp_Ballele.npz",
f"{config['output_snpinfo']}/unique_snp_ids.npy",
output:
expand("{{outputdir}}" + "/configfile{r}", r=config['random_state'])
params:
outputdir="{outputdir}",
threads: 1
run:
if "bamlist" in config:
calicost_config = calicost.arg_parse.get_default_config_joint()
else:
calicost_config = calicost.arg_parse.get_default_config_single()

# update input
calicost_config['snp_dir'] = "/".join( input[0].split("/")[:-1] )
calicost_config['output_dir'] = f"{params.outputdir}"
if 'spaceranger_dir' in calicost_config:
assert 'spaceranger_dir' in config
calicost_config['spaceranger_dir'] = config['spaceranger_dir']
if 'input_filelist' in calicost_config:
assert 'bamlist' in config
calicost_config['input_filelist'] = config['bamlist']
if Path(f"{config['output_snpinfo']}/merged_deconvolution.tsv").exists():
calicost_config['tumorprop_file'] = f"{config['output_snpinfo']}/merged_deconvolution.tsv"

for k in calicost_config.keys():
if k in config:
calicost_config[k] = config[k]

for r in config['random_state']:
calicost_config["num_hmrf_initialization_start"] = r
calicost_config["num_hmrf_initialization_end"] = r+1
calicost.arg_parse.write_config_file(f"{params.outputdir}/configfile{r}", calicost_config)


rule prepare_calicost_data:
input:
expand("{{outputdir}}" + "/configfile{r}", r=config['random_state']),
output:
f"{{outputdir}}/parsed_inputs/table_bininfo.csv.gz",
f"{{outputdir}}/parsed_inputs/table_rdrbaf.csv.gz",
f"{{outputdir}}/parsed_inputs/table_meta.csv.gz",
f"{{outputdir}}/parsed_inputs/exp_counts.pkl",
f"{{outputdir}}/parsed_inputs/adjacency_mat.npz",
f"{{outputdir}}/parsed_inputs/smooth_mat.npz",
f"{{outputdir}}/initial_phase.npz"
params:
outputdir="{outputdir}",
threads: 1
log:
"{outputdir}/logs/prepare_calicost_data.log"
run:
command = f"OMP_NUM_THREADS=1 python {config['calicost_dir']}/src/calicost/parse_input.py -c {input[0]} >> {log} 2>&1"
shell(command)


rule run_calicost:
input:
f"{{outputdir}}/configfile{{r}}",
f"{{outputdir}}/parsed_inputs/table_bininfo.csv.gz",
f"{{outputdir}}/parsed_inputs/table_rdrbaf.csv.gz",
f"{{outputdir}}/parsed_inputs/table_meta.csv.gz",
f"{{outputdir}}/parsed_inputs/exp_counts.pkl",
f"{{outputdir}}/parsed_inputs/adjacency_mat.npz",
f"{{outputdir}}/parsed_inputs/smooth_mat.npz"
output:
f"{{outputdir}}/summary{{r}}",
params:
outputdir="{outputdir}",
r="{r}"
threads: 1
log:
"{outputdir}/logs/calicost_run_{r}.log"
run:
command = f"OMP_NUM_THREADS=1 python {config['calicost_dir']}/src/calicost/calicost_main.py -c {input[0]} >> {log} 2>&1"
shell(command)
shell(f"echo {command} > {output}")
4 changes: 3 additions & 1 deletion src/calicost/arg_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def read_configuration_file(filename):
# warning that the argument is not a valid configuration parameter and continue
logger.warning(f"{strs[0]} is not a valid configuration parameter! Configuration parameters are: {list(config.keys())}")
continue
if strs[1].upper() == "NONE":
if len(strs) == 1:
config[strs[0]] = []
elif strs[1].upper() == "NONE":
config[strs[0]] = None
elif argument_type[strs[0]] == "str":
config[strs[0]] = strs[1]
Expand Down

0 comments on commit b1ef7bb

Please sign in to comment.