Skip to content

Commit

Permalink
working optional beer (#63)
Browse files Browse the repository at this point in the history
* working optional beer
  • Loading branch information
jgallowa07 authored Jun 12, 2023
1 parent 73075fa commit 46e9dec
Show file tree
Hide file tree
Showing 54 changed files with 183 additions and 20,260 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Nextflow
.nextflow*
**/*Attic*
**/*_ignore*


# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ A Nextflow pipeline for Common Phage Immuno-Precipitation Sequencing experiments
See the [Documentation](https://matsengrp.github.io/phippery/introduction.html)
for more details and usage examples.

[![nextflow]()]()
[![Build Status]()]()

## Quickstart

Install `Nextflow` by using the following command:
Expand All @@ -21,6 +18,6 @@ Launch the pipeline execution with the following command:

nextflow run matsengrp/phip-flow -profile docker

Note: the [Dockerfile](docker/Dockerfile) contains all the required dependencies.
Note: the ``phippery`` [Dockerfile](https://github.com/matsengrp/phippery/blob/main/Dockerfile) contains all the required dependencies.
Add the `-profile docker` to enable the containerized execution to the
example command line shown below.
47 changes: 47 additions & 0 deletions bin/run_BEER.Rscript
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env Rscript

library(beer)
library(PhIPData)
library(dplyr)

# input from the edgeR process
edgeR_out = readRDS("PhIPData.rds")

# For more on running BEER
# see http://www.bioconductor.org/packages/release/bioc/html/beer.html

### Named vector specifying where we want to store the summarized MCMC output
### NULL indicates that the output should not be stored.
print("Setting up BEER")
assay_locations <- c(
phi = "beer_fc_marg",
phi_Z = "beer_fc_cond",
Z = "beer_prob",
c = "sampleInfo",
pi = "sampleInfo"
)

print("Running BEER::brew")
beer_out <- brew(edgeR_out, assay.names = assay_locations)

## Define matrix of peptides that were run in BEER
print("Getting matrix of peptides that were run")
was_run <- matrix(rep(beer_out$group != "beads", each = nrow(beer_out)),
nrow = nrow(beer_out))

## Identify super-enriched peptides
## These peptides were in samples that were run, but have missing posterior
## probabilities
print("Identifying super-enriched peptides")
are_se <- was_run & is.na(assay(beer_out, "beer_prob"))

## Enriched peptides are peptides with:
## - posterior probability > 0.5, OR
## - super-enriched peptides
print("Rerunning BEER")
assay(beer_out, "beer_hits") <- assay(beer_out, "beer_prob") > 0.5 | are_se

write.csv(assays(beer_out)$beer_prob, file="beer_prob.csv")
write.csv(assays(beer_out)$beer_hits, file="beer_hits.csv")

saveRDS(edgeR_out, "PhIPData.rds")
63 changes: 0 additions & 63 deletions data/conf/column_names.yaml

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10,048 changes: 0 additions & 10,048 deletions data/pan-cov-example-with-beads-no-lib/peptide_table.csv

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10,048 changes: 0 additions & 10,048 deletions data/pan-cov-example-with-beads/peptide_table.csv

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 0 additions & 7 deletions data/pan-cov-example/sample_table.csv

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
technical_replicate_id,submitted_by,library_batch,control_status,sample_ID,sample_type,species,participant_ID,age,sex,race,days_from_symptom_onset,patient_status,source,pandemic_status,fastq_filepath
273,hannah,MEGSUB,library,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example-with-beads/NGS/4B-rep1-27-library_S26_L001_R1_001.fastq.gz.test.gz
572,mackenzie,MEGSUB,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example-with-beads/NGS/expt10B-MEGSUB-4_S4_L001_R1_001.fastq.gz.test.gz
247,caitlin,SUB2,library,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example-with-beads/NGS/4A-rep1-27-library_S27_L001_R1_001.fastq.gz.test.gz
725,caitlin,SUB2,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example-with-beads/NGS/ex11a-beads-35_S87_L001_R1_001.fastq.gz.test.gz
90,caitlin,MEGSUB,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example-with-beads/NGS/rep1-42_S42_L001_R1_001.fastq.gz.test.gz
382,caitlin,SUB2,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example-with-beads/NGS/ex8-rep2-42_S87_L001_R1_001.fastq.gz.test.gz
269,hannah,MEGSUB,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example-with-beads/NGS/4B-rep1-22_S22_L001_R1_001.fastq.gz.test.gz
242,caitlin,SUB2,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example-with-beads/NGS/4A-rep2-22_S49_L001_R1_001.fastq.gz.test.gz
273,hannah,MEGSUB,library,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/4B-rep1-27-library_S26_L001_R1_001.fastq.gz.test.gz
572,mackenzie,MEGSUB,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/expt10B-MEGSUB-4_S4_L001_R1_001.fastq.gz.test.gz
247,caitlin,SUB2,library,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/4A-rep1-27-library_S27_L001_R1_001.fastq.gz.test.gz
725,caitlin,SUB2,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/ex11a-beads-35_S87_L001_R1_001.fastq.gz.test.gz
90,caitlin,MEGSUB,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example/NGS/rep1-42_S42_L001_R1_001.fastq.gz.test.gz
382,caitlin,SUB2,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example/NGS/ex8-rep2-42_S87_L001_R1_001.fastq.gz.test.gz
269,hannah,MEGSUB,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example/NGS/4B-rep1-22_S22_L001_R1_001.fastq.gz.test.gz
242,caitlin,SUB2,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example/NGS/4A-rep2-22_S49_L001_R1_001.fastq.gz.test.gz
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
technical_replicate_id,submitted_by,library_batch,control_status,sample_ID,sample_type,species,participant_ID,age,sex,race,days_from_symptom_onset,patient_status,source,pandemic_status,fastq_filepath
572,mackenzie,MEGSUB,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example-with-beads-no-lib/NGS/expt10B-MEGSUB-4_S4_L001_R1_001.fastq.gz.test.gz
725,caitlin,SUB2,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example-with-beads-no-lib/NGS/ex11a-beads-35_S87_L001_R1_001.fastq.gz.test.gz
90,caitlin,MEGSUB,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example-with-beads-no-lib/NGS/rep1-42_S42_L001_R1_001.fastq.gz.test.gz
382,caitlin,SUB2,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example-with-beads-no-lib/NGS/ex8-rep2-42_S87_L001_R1_001.fastq.gz.test.gz
269,hannah,MEGSUB,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example-with-beads-no-lib/NGS/4B-rep1-22_S22_L001_R1_001.fastq.gz.test.gz
242,caitlin,SUB2,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example-with-beads-no-lib/NGS/4A-rep2-22_S49_L001_R1_001.fastq.gz.test.gz
572,mackenzie,MEGSUB,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/expt10B-MEGSUB-4_S4_L001_R1_001.fastq.gz.test.gz
725,caitlin,SUB2,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/ex11a-beads-35_S87_L001_R1_001.fastq.gz.test.gz
90,caitlin,MEGSUB,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example/NGS/rep1-42_S42_L001_R1_001.fastq.gz.test.gz
382,caitlin,SUB2,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example/NGS/ex8-rep2-42_S87_L001_R1_001.fastq.gz.test.gz
269,hannah,MEGSUB,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example/NGS/4B-rep1-22_S22_L001_R1_001.fastq.gz.test.gz
242,caitlin,SUB2,empirical,45,serum,human,13a,NA,NA,NA,NA,healthy adult,Chu lab,pre,data/pan-cov-example/NGS/4A-rep2-22_S49_L001_R1_001.fastq.gz.test.gz
4 changes: 4 additions & 0 deletions data/pan-cov-example/sample_table_with_beads_one_emp.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
technical_replicate_id,submitted_by,library_batch,control_status,sample_ID,sample_type,species,participant_ID,age,sex,race,days_from_symptom_onset,patient_status,source,pandemic_status,fastq_filepath
572,mackenzie,MEGSUB,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/expt10B-MEGSUB-4_S4_L001_R1_001.fastq.gz.test.gz
725,caitlin,SUB2,beads_only,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,data/pan-cov-example/NGS/ex11a-beads-35_S87_L001_R1_001.fastq.gz.test.gz
90,caitlin,MEGSUB,empirical,80,ACD plasma,human,32C,36.0,Male,White,30.0,conv outpatient 30d,Helen Chu,pandemic,data/pan-cov-example/NGS/rep1-42_S42_L001_R1_001.fastq.gz.test.gz
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ nextflow.enable.dsl = 2
/*
* Define the default parameters - example data get's run by default
*/
params.sample_table = "$baseDir/data/pan-cov-example/sample_table.csv"
if (params.sample_table != "$baseDir/data/pan-cov-example/sample_table.csv")
params.sample_table = "$baseDir/data/pan-cov-example/sample_table_with_beads_and_lib.csv"
if (params.sample_table != "$baseDir/data/pan-cov-example/sample_table_with_beads_and_lib.csv")
params.reads_prefix = "$launchDir"
else
params.reads_prefix = "$baseDir"
Expand Down
7 changes: 5 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ params{

run_cpm_enr_workflow = false
run_zscore_fit_predict = false
run_edgeR_save_rds = false
run_edgeR = true

// WARNING: This functionality has not been fully tested
run_BEER = false


/*
Expand Down Expand Up @@ -103,7 +106,7 @@ process {
// Default for any processes which do not match the selectors below
container = 'quay.io/hdc-workflows/phippery:1.1.4'

withName: run_edgeR {
withName: 'run_edgeR|run_BEER' {
container = 'quay.io/biocontainers/bioconductor-beer:1.2.0--r42hdfd78af_0'
}

Expand Down
67 changes: 0 additions & 67 deletions workflows/edgeR.nf

This file was deleted.

99 changes: 99 additions & 0 deletions workflows/edgeR_BEER.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Run external statistical analysis tools


// EXTRACT WIDE CSV
process to_csv {
input: path phip_data
output:
tuple path(phip_data), path("*.csv")
shell:
"""
phippery to-wide-csv -o dataset $phip_data
"""
}

// RUN BEER
process run_edgeR {
// publishDir "$params.results/rds_data/", mode: 'copy', overwrite: true
input:
tuple path(phip_data), path(phip_data_csvs)
output:
tuple path(phip_data), path("edgeR*.csv"), path("PhIPData.rds"), val("edgeR")
shell:
"""
run_edgeR.Rscript ${params.edgeR_threshold}
"""
}
//mv PhIPData.rds ${params.dataset_prefix}.rds

process run_BEER {
// publishDir "$params.results/rds_data/", mode: 'copy', overwrite: true
input:
tuple path(phip_data), path("*"), path(edgeR_rds), val(method)
output:
tuple path(phip_data), path("beer*.csv"), path("PhIPData.rds"), val("BEER")
shell:
"""
run_BEER.Rscript
"""

}

process publish_rds {
publishDir "$params.results/rds_data/", mode: 'copy', overwrite: true
input:
tuple path(phip_data), path(csvs), path(rds_data), val(method)
output:
path rds_data
"""
echo publishing $rds_data
"""
}

// APPEND EDGER RESULTS INTO XARRAY DATASET
process append_assay_csvs_to_xarray {
input:
tuple path(phip_data), path(csvs), path(rds_data), val(method)
output:
path "${method}.phip"
shell:
"""
#!/usr/bin/env python3
import glob
from phippery.utils import *
import pandas as pd
ds = load("$phip_data")
for csv in glob.glob("*.csv"):
df = pd.read_csv(csv, index_col=0)
table_name = csv.split(".")[0]
add_enrichment_layer_from_array(
ds, df.values, new_table_name=table_name
)
dump(ds, "${method}.phip")
"""
}

workflow edgeR_BEER_workflows {
take:
ds
main:

if ( params.run_BEER )
ds | to_csv \
| run_edgeR \
| run_BEER \
| (append_assay_csvs_to_xarray & publish_rds)
else
ds | to_csv \
| run_edgeR \
| (append_assay_csvs_to_xarray & publish_rds)

emit:
append_assay_csvs_to_xarray.out

}


Loading

0 comments on commit 46e9dec

Please sign in to comment.