-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b23bec2
commit f1364d0
Showing
3 changed files
with
201 additions
and
219 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,159 @@ | ||
''' | ||
creates a mip_ids folder and an allMipsSamplesNames.tab.txt file. extracts mips, | ||
corrects mips, and generates files that can be used to determine sample names as | ||
well as sample names that had extractable data. | ||
''' | ||
|
||
configfile: 'wrangler_by_sample.yaml' | ||
output=config['output_folder'] | ||
output='/opt/analysis' | ||
|
||
all_samples, all_targets=[],[] | ||
|
||
for line_number, line in enumerate(open(output+'/mip_ids/allMipsSamplesNames.tab.txt')): | ||
if line_number>0: | ||
line=line.rstrip().split('\t') | ||
if len(line)>1 and len(line[1])>0: | ||
all_samples.append(line[1]) | ||
if len(line[0])>0: | ||
all_targets.append(line[0]) | ||
|
||
final_dict={1: expand(output+'/analysis/{sample}/{sample}_mipExtraction/log.txt', sample=all_samples), | ||
2: expand(output+'/analysis/{sample}/{sample}_mipBarcodeCorrection/barcodeFilterStats.tab.txt', sample=all_samples), | ||
3: output+'/analysis/logs/mipCorrectForContamWithSameBarcodes_run1.json', | ||
4: expand(output+'/clustering_status/{sample}_mip_clustering_finished.txt', sample=all_samples), | ||
5: expand(output+'/analysis/populationClustering/{target}/analysis/log.txt', target=all_targets), | ||
6: output+'/allInfo.tsv.gz'} | ||
output_choice=config['output_choice'] | ||
final_out=final_dict[output_choice] | ||
|
||
rule all: | ||
input: | ||
setup_finished=output+'/setup_finished.txt', | ||
# good_samples=output+'/successfully_extracted_samples.txt', | ||
output_configfile=output+'/snakemake_params/wrangler_by_sample.yaml' | ||
final_out | ||
|
||
rule copy_files: | ||
rule extract_by_arm: | ||
input: | ||
setup_snakefile='setup_run.smk', | ||
finish_snakefile='finish_run.smk', | ||
input_configfile='wrangler_by_sample.yaml', | ||
in_scripts='scripts' | ||
This comment has been minimized.
Sorry, something went wrong. |
||
params: | ||
output_dir='/opt/analysis/analysis', | ||
# wrangler_dir=output, | ||
# fastq_dir=config['fastq_dir'] | ||
resources: | ||
time_min=240 | ||
output: | ||
setup_snakefile=output+'/snakemake_params/setup_run.smk', | ||
finish_snakefile=output+'/snakemake_params/finish_run.smk', | ||
output_configfile=output+'/snakemake_params/wrangler_by_sample.yaml', | ||
out_scripts=directory(output+'/snakemake_params/scripts') | ||
output+'/analysis/{sample}/{sample}_mipExtraction/log.txt' | ||
shell: | ||
''' | ||
cp {input.setup_snakefile} {output.setup_snakefile} | ||
cp {input.finish_snakefile} {output.finish_snakefile} | ||
cp {input.input_configfile} {output.output_configfile} | ||
cp -r {input.in_scripts} {output.out_scripts} | ||
MIPWrangler mipExtractByArm --masterDir {params.output_dir} --sample {wildcards.sample} --overWriteDirs --minCaptureLength=30 | ||
''' | ||
if config['downsample_umi_count']<2**32: | ||
rule mip_barcode_correction: | ||
input: | ||
good_samples=expand(output+'/analysis/{sample}/{sample}_mipExtraction/log.txt', sample=all_samples) | ||
params: | ||
output_dir='/opt/analysis/analysis', | ||
# wrangler_dir=output, | ||
# sif_file=config['miptools_sif'], | ||
downsample_seed=config['downsample_seed'], | ||
downsample_amount=config['downsample_umi_count'] | ||
resources: | ||
mem_mb=config['memory_mb_per_step'], | ||
time_min=20 | ||
output: | ||
barcode_corrections_finished=output+'/analysis/{sample}/{sample}_mipBarcodeCorrection/barcodeFilterStats.tab.txt' | ||
shell: | ||
''' | ||
MIPWrangler mipBarcodeCorrection --masterDir {params.output_dir} \ | ||
--downSampleAmount {params.downsample_amount} --downSampleSeed \ | ||
{params.downsample_seed} --overWriteDirs --sample {wildcards.sample} | ||
''' | ||
else: | ||
rule mip_barcode_correction: | ||
input: | ||
good_samples=expand(output+'/analysis/{sample}/{sample}_mipExtraction/log.txt', sample=all_samples) | ||
params: | ||
output_dir='/opt/analysis/analysis', | ||
# wrangler_dir=output, | ||
# sif_file=config['miptools_sif'], | ||
downsample_seed=config['downsample_seed'], | ||
resources: | ||
mem_mb=config['memory_mb_per_step'], | ||
time_min=20 | ||
output: | ||
barcode_corrections_finished=output+'/analysis/{sample}/{sample}_mipBarcodeCorrection/barcodeFilterStats.tab.txt' | ||
shell: | ||
''' | ||
MIPWrangler mipBarcodeCorrection --masterDir {params.output_dir} \ | ||
--doNotDownSample --downSampleSeed \ | ||
{params.downsample_seed} --overWriteDirs --sample {wildcards.sample} | ||
''' | ||
|
||
rule generate_mip_files: | ||
''' | ||
given that I'm repackaging miptools wrangler (so wrangler.sh is not needed) | ||
and that the existing generate_wrangler_scripts.py seems unnecessarily | ||
convoluted and that only two files are needed by subsequent steps | ||
(mipArms.txt and allMipsSamplesNames.tab.txt) I wrote my own | ||
script for this. Input is an arms file and a sample sheet. Output is an arms | ||
file with rearranged columns and a two column file with names of all mips | ||
and names of all samples (with no pairing between columns of any given row). | ||
''' | ||
|
||
rule correct_for_same_barcode_contam: | ||
input: | ||
arms_file=config['project_resources']+'/mip_ids/mip_arms.txt', | ||
sample_sheet=config['input_sample_sheet'], | ||
fastq_folder=config['fastq_dir'] | ||
all_corrected=expand(output+'/analysis/{sample}/{sample}_mipBarcodeCorrection/barcodeFilterStats.tab.txt', sample=all_samples) | ||
params: | ||
sample_set=config['sample_set_used'], | ||
probe_sets=config['probe_sets_used'] | ||
output_dir='/opt/analysis/analysis', | ||
# wrangler_dir=output, | ||
# sif_file=config['miptools_sif'], | ||
resources: | ||
mem_mb=40000, | ||
time_min=1440, | ||
nodes=20 | ||
threads: 20 | ||
output: | ||
mip_arms=output+'/mip_ids/mipArms.txt', | ||
sample_file=output+'/mip_ids/allMipsSamplesNames.tab.txt', | ||
sample_sheet=output+'/sample_sheet.tsv' | ||
script: | ||
'scripts/generate_mip_files.py' | ||
#name is controlled by --logFile | ||
corrected_barcode_marker=output+'/analysis/logs/mipCorrectForContamWithSameBarcodes_run1.json' | ||
shell: | ||
''' | ||
MIPWrangler mipCorrectForContamWithSameBarcodesMultiple --masterDir {params.output_dir} --numThreads {threads} --overWriteDirs --overWriteLog --logFile mipCorrectForContamWithSameBarcodes_run1 | ||
''' | ||
|
||
rule setup: | ||
rule mip_clustering: | ||
input: | ||
mip_arms=output+'/mip_ids/mipArms.txt', | ||
sample_file=output+'/mip_ids/allMipsSamplesNames.tab.txt' | ||
corrected_barcode_marker=output+'/analysis/logs/mipCorrectForContamWithSameBarcodes_run1.json', | ||
#sample_dir=output+'/analysis/{sample}' | ||
params: | ||
output_dir='/opt/analysis/analysis', | ||
project_resources=config['project_resources'], | ||
wrangler_dir=output, | ||
sif_file=config['miptools_sif'], | ||
fastq_dir=config['fastq_dir'] | ||
# wrangler_dir=output, | ||
# sif_file=config['miptools_sif'] | ||
resources: | ||
mem_mb=config['memory_mb_per_step'], | ||
time_min=60, | ||
output: | ||
setup_finished=output+'/setup_finished.txt' | ||
threads: config['cpu_count'] | ||
mip_clustering=output+'/clustering_status/{sample}_mip_clustering_finished.txt' | ||
shell: | ||
''' | ||
singularity exec \ | ||
-B {params.project_resources}:/opt/project_resources \ | ||
-B {params.wrangler_dir}:/opt/analysis \ | ||
-B {params.fastq_dir}:/opt/data \ | ||
{params.sif_file} \ | ||
MIPWrangler mipSetup --mipArmsFilename /opt/analysis/mip_ids/mipArms.txt --mipSampleFile /opt/analysis/mip_ids/allMipsSamplesNames.tab.txt --numThreads {threads} --masterDir {params.output_dir} --dir /opt/data --mipServerNumber 1 | ||
touch {output.setup_finished} | ||
MIPWrangler mipClustering --masterDir {params.output_dir} --overWriteDirs --par /opt/resources/clustering_pars/illumina_collapseHomoploymers.pars.txt --countEndGaps --sample {wildcards.sample} | ||
touch {output.mip_clustering} | ||
''' | ||
|
||
rule pop_cluster_target: | ||
input: | ||
mip_cluster_files=expand(output+'/clustering_status/{sample}_mip_clustering_finished.txt', sample=all_samples) | ||
params: | ||
output_dir='/opt/analysis/analysis', | ||
# wrangler_dir=output, | ||
# sif_file=config['miptools_sif'] | ||
resources: | ||
mem_mb=config['memory_mb_per_step'], | ||
time_min=60, | ||
output: | ||
pop_clustering=output+'/analysis/populationClustering/{target}/analysis/log.txt' | ||
shell: | ||
''' | ||
MIPWrangler mipPopulationClustering --keepIntermediateFiles --masterDir {params.output_dir} --overWriteDirs --cutoff 0 --countEndGaps --fraccutoff 0.005 --mipName {wildcards.target} | ||
touch {output.pop_clustering} | ||
''' | ||
|
||
rule output_final_table: | ||
''' | ||
cat together output files of previous step into a final file, do a "natural | ||
sort" to sort things similar to how Nick's are output. gzip it | ||
''' | ||
input: | ||
pop_clustering=expand(output+'/analysis/populationClustering/{target}/analysis/log.txt', target=all_targets) | ||
# final_sample_outputs=expand('/path/to/sample/outputs/{sample}.something', sample=sample_list) | ||
params: | ||
all_targets=all_targets, | ||
prefix=output+'/analysis/populationClustering/', | ||
suffix='/analysis/selectedClustersInfo.tab.txt.gz' | ||
resources: | ||
mem_mb=20000, | ||
time_min=480 | ||
output: | ||
final_table=output+'/allInfo.tsv.gz' | ||
script: | ||
'scripts/output_final_table.py' |
Oops, something went wrong.
To do: add the mip_ids folder (created in the setup_run step) as an official input to this rule, so that it checks if the necessary input mip arms and sampleNames.txt files exist and alerts user if input is not there