diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 510bf1cb..1ea40b4d 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -1,7 +1,7 @@ name: nf-core AWS full size tests # This workflow is triggered on published releases. # It can be additionally triggered manually with GitHub actions workflow dispatch button. -# It runs the -profile 'test_full' on AWS batch +# It runs the -profile 'test_lfq' on AWS batch on: release: @@ -17,7 +17,7 @@ jobs: uses: nf-core/tower-action@v3 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters + # on the `test_lfq.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -28,7 +28,7 @@ jobs: { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/quantms/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_lfq,aws_tower nextflow_config: | process.errorStrategy = 'retry' process.maxRetries = 3 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index eb46abf6..a6d99339 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -1,6 +1,6 @@ name: nf-core AWS test # This workflow can be triggered manually with the GitHub actions workflow dispatch button. -# It runs the -profile 'test' on AWS batch +# It runs the -profile 'test_tmt' on AWS batch on: workflow_dispatch: @@ -23,7 +23,7 @@ jobs: { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/quantms/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test_tmt,aws_tower nextflow_config: | process.errorStrategy = 'retry' process.maxRetries = 3 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3393fea..799f83b8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: # Test latest edge release of Nextflow - NXF_VER: "" NXF_EDGE: "1" - test_profile: ["test", "test_lfq", "test_dia", "test_localize"] + test_profile: ["test_lfq", "test_dia", "test_localize", "test_tmt"] exec_profile: ["docker", "conda"] exclude: - test_profile: test_dia diff --git a/.nf-core.yml b/.nf-core.yml index 778ae193..c021bae9 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,3 +2,5 @@ repository_type: pipeline lint: files_exist: - conf/igenomes.config + - conf/test_full.config + - conf/test.config diff --git a/assets/PXD000001.sdrf.tsv b/assets/PXD000001.sdrf.tsv deleted file mode 100644 index f9ca1631..00000000 --- a/assets/PXD000001.sdrf.tsv +++ /dev/null @@ -1,7 +0,0 @@ -Source Name Characteristics[organism] Characteristics[organism part] Characteristics[age] Characteristics[ancestry category] Characteristics[developmental stage] Characteristics[cell line] Characteristics[cell type] Characteristics[sex] Characteristics[mass] Characteristics[spiked compound] Characteristics[spiked compound 2] Characteristics[spiked compound 3] Characteristics[spiked compound 4] Characteristics[disease] Characteristics[biological replicate] Material Type assay name technology type comment[data file] comment[file uri] comment[technical replicate] comment[fraction identifier] comment[label] comment[instrument] comment[modification parameters] comment[modification parameters] comment[modification parameters] comment[modification parameters] comment[cleavage agent details] comment[dissociation method] comment[precursor mass tolerance] comment[fragment mass tolerance] Factor Value[spiked compound] Factor Value[spiked compound] Factor Value[spiked compound] Factor Value[spiked compound] -Sample 1 Dickeya chrysanthemi whole plant not available not available not available not applicable not available not applicable 1 SP=Yeast;CT=protein;AC=P00924;QY=10 SP=BOVIN;CT=protein;AC=P02769;QY=1 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 not available 1 cell run 1 proteomic profiling by mass spectrometry TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw ftp://ftp.pride.ebi.ac.uk/pride-archive/2012/03/PXD000001/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw 1 1 TMT126 NT=LTQ Orbitrap Velos;AC=MS:1001742 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Methylthio;TA=C;MT=fixed;AC=UNIMOD:39 NT=TMT6plex;TA=K;MT=Fixed;AC=UNIMOD:737 NT=TMT6plex;PP=Any N-term;MT=Fixed;AC=UNIMOD:737 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 not available not available SP=Yeast;CT=protein;AC=P00924;QY=10 SP=BOVIN;CT=protein;AC=P02769;QY=1 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 -Sample 2 Dickeya chrysanthemi whole plant not available not available not available not applicable not available not applicable 1 SP=Yeast;CT=protein;AC=P00924;QY=5 SP=BOVIN;CT=protein;AC=P02769;QY=2.5 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 not available 1 cell run 1 proteomic profiling by mass spectrometry TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw ftp://ftp.pride.ebi.ac.uk/pride-archive/2012/03/PXD000001/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw 1 1 TMT127 NT=LTQ Orbitrap Velos;AC=MS:1001742 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Methylthio;TA=C;MT=fixed;AC=UNIMOD:39 NT=TMT6plex;TA=K;MT=Fixed;AC=UNIMOD:737 NT=TMT6plex;PP=Any N-term;MT=Fixed;AC=UNIMOD:737 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 not available not available SP=Yeast;CT=protein;AC=P00924;QY=5 SP=BOVIN;CT=protein;AC=P02769;QY=2.5 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 -Sample 3 Dickeya chrysanthemi whole plant not available not available not available not applicable not available not applicable 1 SP=Yeast;CT=protein;AC=P00924;QY=2.5 SP=BOVIN;CT=protein;AC=P02769;QY=5 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 not available 1 cell run 1 proteomic profiling by mass spectrometry TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw ftp://ftp.pride.ebi.ac.uk/pride-archive/2012/03/PXD000001/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw 1 1 TMT128 NT=LTQ Orbitrap Velos;AC=MS:1001742 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Methylthio;TA=C;MT=fixed;AC=UNIMOD:39 NT=TMT6plex;TA=K;MT=Fixed;AC=UNIMOD:737 NT=TMT6plex;PP=Any N-term;MT=Fixed;AC=UNIMOD:737 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 not available not available SP=Yeast;CT=protein;AC=P00924;QY=2.5 SP=BOVIN;CT=protein;AC=P02769;QY=5 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 -Sample 4 Dickeya chrysanthemi whole plant not available not available not available not applicable not available not applicable 1 SP=Yeast;CT=protein;AC=P00924;QY=1 SP=BOVIN;CT=protein;AC=P02769;QY=10 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 not available 1 cell run 1 proteomic profiling by mass spectrometry TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw ftp://ftp.pride.ebi.ac.uk/pride-archive/2012/03/PXD000001/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw 1 1 TMT129 NT=LTQ Orbitrap Velos;AC=MS:1001742 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Methylthio;TA=C;MT=fixed;AC=UNIMOD:39 NT=TMT6plex;TA=K;MT=Fixed;AC=UNIMOD:737 NT=TMT6plex;PP=Any N-term;MT=Fixed;AC=UNIMOD:737 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 not available not available SP=Yeast;CT=protein;AC=P00924;QY=1 SP=BOVIN;CT=protein;AC=P02769;QY=10 SP=RABIT;CT=protein;AC=P00489;QY=2 SP=BOVIN;CT=protein;AC=P62894;QY=1 -Sample 5 Dickeya chrysanthemi whole plant not available not available not available not applicable not available not applicable 1 SP=Yeast;CT=protein;AC=P00924;QY=2.5 SP=BOVIN;CT=protein;AC=P02769;QY=5 SP=RABIT;CT=protein;AC=P00489;QY=1 SP=BOVIN;CT=protein;AC=P62894;QY=1 not available 1 cell run 1 proteomic profiling by mass spectrometry TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw ftp://ftp.pride.ebi.ac.uk/pride-archive/2012/03/PXD000001/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw 1 1 TMT130 NT=LTQ Orbitrap Velos;AC=MS:1001742 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Methylthio;TA=C;MT=fixed;AC=UNIMOD:39 NT=TMT6plex;TA=K;MT=Fixed;AC=UNIMOD:737 NT=TMT6plex;PP=Any N-term;MT=Fixed;AC=UNIMOD:737 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 not available not available SP=Yeast;CT=protein;AC=P00924;QY=2.5 SP=BOVIN;CT=protein;AC=P02769;QY=5 SP=RABIT;CT=protein;AC=P00489;QY=1 SP=BOVIN;CT=protein;AC=P62894;QY=1 -Sample 6 Dickeya chrysanthemi whole plant not available not available not available not applicable not available not applicable 1 SP=Yeast;CT=protein;AC=P00924;QY=10 SP=BOVIN;CT=protein;AC=P02769;QY=1 SP=RABIT;CT=protein;AC=P00489;QY=1 SP=BOVIN;CT=protein;AC=P62894;QY=2 not available 1 cell run 1 proteomic profiling by mass spectrometry TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw ftp://ftp.pride.ebi.ac.uk/pride-archive/2012/03/PXD000001/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw 1 1 TMT131 NT=LTQ Orbitrap Velos;AC=MS:1001742 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Methylthio;TA=C;MT=fixed;AC=UNIMOD:39 NT=TMT6plex;TA=K;MT=Fixed;AC=UNIMOD:737 NT=TMT6plex;PP=Any N-term;MT=Fixed;AC=UNIMOD:737 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 not available not available SP=Yeast;CT=protein;AC=P00924;QY=10 SP=BOVIN;CT=protein;AC=P02769;QY=1 SP=RABIT;CT=protein;AC=P00489;QY=1 SP=BOVIN;CT=protein;AC=P62894;QY=2 diff --git a/assets/PXD026600.sdrf.tsv b/assets/PXD026600.sdrf.tsv deleted file mode 100644 index ed709b07..00000000 --- a/assets/PXD026600.sdrf.tsv +++ /dev/null @@ -1,5 +0,0 @@ -Source Name Characteristics[organism] Characteristics[organism part] Characteristics[age] Characteristics[strain] Characteristics[developmental stage] Characteristics[cell line] Characteristics[cell type] Characteristics[sex] Characteristics[disease] characteristics[mass] characteristics[spiked compound] Characteristics[biological replicate] Material Type assay name technology type comment[data file] comment[file uri] comment[technical replicate] comment[fraction identifier] comment[proteomics data acquisition method] comment[label] comment[instrument] comment[modification parameters] comment[modification parameters] comment[cleavage agent details] comment[dissociation method] comment[collision energy] comment[precursor mass tolerance] comment[fragment mass tolerance] factor value[spiked compound] -Sample 1 Escherichia coli K-12 whole plant not available K12 not available not applicable not available not available not available 1 ug CT=Mixture;CN=UPS1;QY=0.1 fmol 1 whole organism run 1 proteomic profiling by mass spectrometry RD139_Narrow_UPS1_0_1fmol_inj1.raw ftp://massive.ucsd.edu/MSV000087597/raw/RD139_Raw_files_DIA_Narrow/RD139_Narrow_UPS1_0_1fmol_inj1.raw 1 1 NT=Data-Independent Acquisition;AC=NCIT:C161786 AC=MS:1002038;NT=label free sample NT=Orbitrap Fusion;AC=MS:1002416 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 35% CE 10 ppm 20 mmu CT=Mixture;CN=UPS1;QY=0.1 fmol -Sample 1 Escherichia coli K-12 whole plant not available K12 not available not applicable not available not available not available 1 ug CT=Mixture;CN=UPS1;QY=0.1 fmol 1 whole organism run 2 proteomic profiling by mass spectrometry RD139_Narrow_UPS1_0_1fmol_inj2.raw ftp://massive.ucsd.edu/MSV000087597/raw/RD139_Raw_files_DIA_Narrow/RD139_Narrow_UPS1_0_1fmol_inj2.raw 2 1 NT=Data-Independent Acquisition;AC=NCIT:C161786 AC=MS:1002038;NT=label free sample NT=Orbitrap Fusion;AC=MS:1002416 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Carbamidomethyl;TA=C;MT=fixed;AC=UNIMOD:4 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 35% CE 10 ppm 20 mmu CT=Mixture;CN=UPS1;QY=0.1 fmol -Sample 2 Escherichia coli K-12 whole plant not available K12 not available not applicable not available not available not available 1 ug CT=Mixture;CN=UPS1;QY=0.25 fmol 1 whole organism run 3 proteomic profiling by mass spectrometry RD139_Narrow_UPS1_0_25fmol_inj1.raw ftp://massive.ucsd.edu/MSV000087597/raw/RD139_Raw_files_DIA_Narrow/RD139_Narrow_UPS1_0_25fmol_inj1.raw 1 1 NT=Data-Independent Acquisition;AC=NCIT:C161786 AC=MS:1002038;NT=label free sample NT=Orbitrap Fusion;AC=MS:1002416 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Carbamidomethyl;TA=C;MT=Fixed;AC=UNIMOD:4 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 35% CE 10 ppm 20 mmu CT=Mixture;CN=UPS1;QY=0.25 fmol -Sample 2 Escherichia coli K-12 whole plant not available K12 not available not applicable not available not available not available 1 ug CT=Mixture;CN=UPS1;QY=0.25 fmol 1 whole organism run 4 proteomic profiling by mass spectrometry RD139_Narrow_UPS1_0_25fmol_inj2.raw ftp://massive.ucsd.edu/MSV000087597/raw/RD139_Raw_files_DIA_Narrow/RD139_Narrow_UPS1_0_25fmol_inj2.raw 2 1 NT=Data-Independent Acquisition;AC=NCIT:C161786 AC=MS:1002038;NT=label free sample NT=Orbitrap Fusion;AC=MS:1002416 NT=Oxidation;MT=Variable;TA=M;AC=Unimod:35 NT=Carbamidomethyl;TA=C;MT=Fixed;AC=UNIMOD:4 AC=MS:1001313;NT=Trypsin NT=HCD;AC=PRIDE:0000590 35% CE 10 ppm 20 mmu CT=Mixture;CN=UPS1;QY=0.25 fmol diff --git a/bin/msstats_plfq.R b/bin/msstats_plfq.R index 7689e745..0253b22e 100755 --- a/bin/msstats_plfq.R +++ b/bin/msstats_plfq.R @@ -1,67 +1,100 @@ #!/usr/bin/env Rscript -args = commandArgs(trailingOnly=TRUE) -char_to_boolean = c("true"=TRUE, "false"=FALSE) -usage <- "Rscript msstats_plfq.R input.csv [list of contrasts or 'pairwise'] [default control condition or ''] ..." - -#TODO rewrite mzTab in next version -if (length(args)<1) { - print(usage) - stop("At least the first argument must be supplied (input csv).n", call.=FALSE) -} -if (length(args)<2) { - # contrasts - args[2] = "pairwise" -} -if (length(args)<3) { - # default control condition - args[3] = "" -} -if (length(args)<4) { - # removeOneFeatProts - args[4] = FALSE -} -removeOneFeatProts = args[4] -if(typeof(removeOneFeatProts) == 'character'){ - removeOneFeatProts = char_to_boolean[removeOneFeatProts] -} - -if (length(args)<5) { - # keeps features with only one or two measurements across runs - args[5] = TRUE -} -removeFewMeasurements = args[5] -if(typeof(removeFewMeasurements) == 'character'){ - removeFewMeasurements = char_to_boolean[removeFewMeasurements] -} - -if (length(args)<6) { - # which features to use for quantification per protein: 'top3' or 'highQuality' which removes outliers only" - args[6] = 'top3' -} -if (length(args)<7) { - # which summary method to use: 'TMP' (Tukey's median polish) or 'linear' (linear mixed model) - args[7] = 'TMP' -} -if (length(args)<8) { - # outputPrefix - args[8] = './msstats' -} - -csv_input <- args[1] -contrast_str <- args[2] -control_str <- args[3] # load the MSstats library require(MSstats) require(tibble) require(data.table) -# helper functions -make_contrasts <- function(contrasts, levels) -{ +# TODO: Functions shared between msstats_plfq and msstats_tmt should be merge in msstats_utils.R +# Please functions syncronized between the three scripts until the code can be merged. + +### Begining Functions section + +#' Inizialize the TMT and LFQ parameters +#' +#' @param usage message to exit the script analysis +#' +#' @return +initialize_msstats <- function(usage) { + args <- commandArgs(trailingOnly = TRUE) + if (length(args) < 1) { + print(usage) + stop("At least the first argument must be supplied (input csv).n", call. = FALSE) + } + if (length(args) < 2) { + args[2] <- "pairwise" + } + + if (length(args) < 3) { + # default control condition + args[3] <- "" + } + + if (length(args) < 4) { + # removeOneFeatProts + args[4] <- FALSE + } + return(args) +} + +#' Handle the number of contrasts in the differential expression analysis. +#' It returns a matrix of the contrasts to be analyzed. +#' +#' @param l +#' @param contrast_str +#' @param lvls number of doncitions +#' +#' @return +#' +parse_contrasts <- function(l, contrast_str, lvls) { + if (contrast_str == "pairwise") { + if (control_str == "") { + contrast_mat <- matrix(nrow = l * (l - 1) / 2, ncol = l, dimnames = list(Contrasts = rep(NA, l * (l - 1) / 2), Levels = lvls)) + c <- 1 + for (i in 1:(l - 1)) { + for (j in (i + 1):l) { + comparison <- rep(0, l) + comparison[i] <- 1 + comparison[j] <- -1 + contrast_mat[c,] <- comparison + rownames(contrast_mat)[c] <- paste0(lvls[i], "-", lvls[j]) + c <- c + 1 + } + } + } else { + control <- which(as.character(lvls) == control_str) + if (length(control) == 0) { + stop("Control condition not part of found levels.n", call. = FALSE) + } + contrast_mat <- matrix(nrow = l - 1, ncol = l, dimnames = list(Contrasts = rep(NA, l - 1), Levels = lvls)) + c <- 1 + for (j in setdiff(1:l, control)) { + comparison <- rep(0, l) + comparison[i] <- -1 + comparison[j] <- 1 + contrast_mat[c,] <- comparison + rownames(contrast_mat)[c] <- paste0(lvls[i], "-", lvls[j]) + c <- c + 1 + } + } + } else { + contrast_lst <- unlist(strsplit(contrast_str, ";")) + contrast_mat <- make_contrasts(contrast_lst, lvls) + } + print("Contrasts to be tested:") + print(contrast_mat) + return(contrast_mat) +} + +#' This functions hels to define the contrasts that will be compare. +#' +#' @param contrasts +#' @param levels +#' +#' @return +make_contrasts <- function(contrasts, levels) { #helper function - indicatorRow <- function(pos,len) - { + indicatorRow <- function(pos,len){ row <- rep(0,len) row[pos] <- 1 return(row) @@ -71,32 +104,88 @@ make_contrasts <- function(contrasts, levels) if (!is.character(levels)) levels <- colnames(levels) l <- length(levels) - if (l < 1) - { + if (l < 1){ stop("No levels given") } ncontr <- length(contrasts) - if (ncontr < 1) - { + if (ncontr < 1){ stop("No contrasts given") } levelsenv <- new.env() - for (i in 1:l) - { + for (i in 1:l) { assign(levels[i], indicatorRow(i,l), pos=levelsenv) } contrastmat <- matrix(0, l, ncontr, dimnames=list(Levels=levels,Contrasts=contrasts)) - for (j in 1:ncontr) - { + for (j in 1:ncontr) { contrastsj <- parse(text=contrasts[j]) contrastmat[,j] <- eval(contrastsj, envir=levelsenv) } return(t(contrastmat)) } +#' Get missing samples by condition +#' +#' @param processedData +#' +#' @return +get_missing_in_condition <- function(processedData) { + p <- processedData + n_samples <- aggregate(p$SUBJECT, by = list(p$GROUP), FUN = function(x) {return(length(unique(as.numeric(x))))}) + colnames(n_samples) <- c("GROUP", "n_samples") + p <- p[complete.cases(p["LogIntensities"]),][,c("Protein", "GROUP", "SUBJECT")] + p_dup <- p[!duplicated(p),] + p_dup_agg <- aggregate(p_dup$SUBJECT, by = list(p_dup$Protein, p_dup$GROUP), length) + colnames(p_dup_agg) <- c("Protein", "GROUP", "non_na") + agg_join <- merge(p_dup_agg, n_samples, by = "GROUP") + agg_join$missingInCondition <- 1 - agg_join$non_na / agg_join$n_samples + + p <- dcast(setDT(agg_join), Protein~GROUP, value.var = "missingInCondition") + return(p) + } + +### End Function Sections + +char_to_boolean <- c("true"=TRUE, "false"=FALSE) +usage <- "Rscript msstats_plfq.R input.csv [list of contrasts or 'pairwise'] [default control condition or ''] ..." + +#TODO rewrite mzTab in next version +args <- initialize_msstats(usage = usage) + +removeOneFeatProts <- args[4] +if(typeof(removeOneFeatProts) == 'character'){ + removeOneFeatProts <- char_to_boolean[removeOneFeatProts] +} + +if (length(args)<5) { + # keeps features with only one or two measurements across runs + args[5] <- TRUE +} +removeFewMeasurements <- args[5] + +if(typeof(removeFewMeasurements) == 'character'){ + removeFewMeasurements <- char_to_boolean[removeFewMeasurements] +} + +if (length(args)<6) { + # which features to use for quantification per protein: 'top3' or 'highQuality' which removes outliers only" + args[6] <- 'top3' +} +if (length(args)<7) { + # which summary method to use: 'TMP' (Tukey's median polish) or 'linear' (linear mixed model) + args[7] <- 'TMP' +} +if (length(args)<8) { + # outputPrefix + args[8] <- './msstats' +} + +csv_input <- args[1] +contrast_str <- args[2] +control_str <- args[3] + # read dataframe into MSstats data <- read.csv(csv_input) quant <- OpenMStoMSstatsFormat(data, removeProtein_with1Feature = removeOneFeatProts, removeFewMeasurements=removeFewMeasurements) @@ -106,127 +195,32 @@ processed.quant <- dataProcess(quant, censoredInt = 'NA', featureSubset = args[6 lvls <- levels(as.factor(data$Condition)) l <- length(lvls) -if (l == 1) -{ + +if (l == 1) { print("Only one condition found. No contrasts to be tested. If this is not the case, please check your experimental design.") } else { - if (contrast_str == "pairwise") - { - if (control_str == "") - { - contrast_mat <- matrix(nrow = l * (l-1) / 2, ncol = l, dimnames=list(Contrasts=rep(NA, l * (l-1) / 2), Levels=lvls)) - c <- 1 - for (i in 1:(l-1)) - { - for (j in (i+1):l) - { - comparison <- rep(0,l) - comparison[i] <- 1 - comparison[j] <- -1 - contrast_mat[c,] <- comparison - rownames(contrast_mat)[c] <- paste0(lvls[i],"-",lvls[j]) - c <- c+1 - } - } - } else { - control <- which(as.character(lvls) == control_str) - if (length(control) == 0) - { - stop("Control condition not part of found levels.n", call.=FALSE) - } - - contrast_mat <- matrix(nrow = l-1, ncol = l, dimnames=list(Contrasts=rep(NA, l-1),Levels=lvls)) - c <- 1 - for (j in setdiff(1:l,control)) - { - comparison <- rep(0,l) - comparison[i] <- -1 - comparison[j] <- 1 - contrast_mat[c,] <- comparison - rownames(contrast_mat)[c] <- paste0(lvls[i],"-",lvls[j]) - c <- c+1 - } - } - } else { - contrast_lst <- unlist(strsplit(contrast_str,";")) - contrast_mat <- make_contrasts(contrast_lst, lvls) - } - + contrast_mat <- parse_contrasts(l = l, contrast_str = contrast_str, lvls = lvls) print ("Contrasts to be tested:") print (contrast_mat) - #TODO allow for user specified contrasts test.MSstats <- groupComparison(contrast.matrix=contrast_mat, data=processed.quant) - #for (comp in rownames(contrast_mat)) - #{ - # groupComparisonPlots(data=test.MSstats$ComparisonResult, type="ComparisonPlot", - # width=12, height=12,dot.size = 2, sig=1)#, - # which.Comparison = comp, - # address=F) - # # try to plot all comparisons - #} - - - # annotate how often the protein was quantified in each condition (NA values introduced by merge of completely missing are set to 1.0) - ############ also calculate missingness on condition level - - # input: ProcessedData matrix of MSstats - # output: - # calculate fraction of na in condition (per protein) - # Groups: PROTEIN [762] - # PROTEIN `1` `2` - # - # 1 sp|A1ANS1|HTPG_PELPD 0 0.5 - # 2 sp|A2I7N3|SPA37_BOVIN 0 0.5 - # 3 sp|A2VDF0|FUCM_HUMAN 0 0.5 - # 4 sp|A6ND91|ASPD_HUMAN 0.5 0.5 - # 5 sp|A7E3W2|LG3BP_BOVIN 0.5 0.5 - # 6 sp|B8FGT4|ATPB_DESAA 0 0.5 - - getMissingInCondition <- function(processedData) { - p <- processedData - n_samples <- aggregate(p$SUBJECT, by = list(p$GROUP), FUN = function(x) {return(length(unique(as.numeric(x))))}) - colnames(n_samples) <- c("GROUP", "n_samples") - p <- p[complete.cases(p["LogIntensities"]),][,c("Protein", "GROUP", "SUBJECT")] - p_dup <- p[!duplicated(p),] - p_dup_agg <- aggregate(p_dup$SUBJECT, by = list(p_dup$Protein, p_dup$GROUP), length) - colnames(p_dup_agg) <- c("Protein", "GROUP", "non_na") - agg_join <- merge(p_dup_agg, n_samples, by = "GROUP") - agg_join$missingInCondition = 1 - agg_join$non_na / agg_join$n_samples - - p <- dcast(setDT(agg_join), Protein~GROUP, value.var = "missingInCondition") - return(p) - } - - mic <- getMissingInCondition(processed.quant$ProteinLevelData) + mic <- get_missing_in_condition(processed.quant$ProteinLevelData) test.MSstats$ComparisonResult <- merge(x=test.MSstats$ComparisonResult, y=mic, by="Protein") commoncols <- intersect(colnames(mic), colnames(test.MSstats$ComparisonResult)) test.MSstats$ComparisonResult[, commoncols] <- apply(test.MSstats$Comparison[, commoncols], 2, function(x) {x[is.na(x)] <- 1; return(x)}) - #write comparison to CSV (one CSV per contrast) - # writeComparisonToCSV <- function(DF) - # { - # write.table(DF, file=paste0("comparison_",unique(DF$Label),".csv"), quote=FALSE, sep='\t', row.names = FALSE) - # return(DF) - # } - # ComparisonResultSplit <- split(test.MSstats$ComparisonResult, test.MSstats$ComparisonResult$Label) - # for(i in 1:length(ComparisonResultSplit)){ - # writeComparisonToCSV(ComparisonResultSplit[[i]]) - # } - #write all comparisons into one CSV file write.table(test.MSstats$ComparisonResult, file=paste0(args[8],"_comparisons.csv"), quote=FALSE, sep='\t', row.names = FALSE) groupComparisonPlots(data=test.MSstats$ComparisonResult, type="ComparisonPlot", width=12, height=12,dot.size = 2) - test.MSstats$Volcano = test.MSstats$ComparisonResult[!is.na(test.MSstats$ComparisonResult$pvalue),] + test.MSstats$Volcano <- test.MSstats$ComparisonResult[!is.na(test.MSstats$ComparisonResult$pvalue),] groupComparisonPlots(data=test.MSstats$Volcano, type="VolcanoPlot", width=12, height=12,dot.size = 2) # Otherwise it fails since the behaviour is undefined - if (nrow(contrast_mat) > 1) - { + if (nrow(contrast_mat) > 1) { groupComparisonPlots(data=test.MSstats$ComparisonResult, type="Heatmap", width=12, height=12,dot.size = 2) } diff --git a/bin/msstats_tmt.R b/bin/msstats_tmt.R index e6d578e7..08e80114 100755 --- a/bin/msstats_tmt.R +++ b/bin/msstats_tmt.R @@ -1,91 +1,219 @@ #!/usr/bin/env Rscript -args = commandArgs(trailingOnly=TRUE) -char_to_boolean = c("true"=TRUE, "false"=FALSE) +require(MSstatsTMT) +# TODO: Functions shared between msstats_plfq and msstats_tmt should be merge in msstats_utils.R +# Please functions syncronized between the three scripts until the code can be merged. + +### Begining Functions section + +#' Inizialize the TMT and LFQ parameters +#' +#' @param usage message to exit the script analysis +#' +#' @return +initialize_msstats <- function(usage) { + args <- commandArgs(trailingOnly = TRUE) + if (length(args) < 1) { + print(usage) + stop("At least the first argument must be supplied (input csv).n", call. = FALSE) + } + if (length(args) < 2) { + args[2] <- "pairwise" + } + + if (length(args) < 3) { + # default control condition + args[3] <- "" + } + + if (length(args) < 4) { + # removeOneFeatProts + args[4] <- FALSE + } + return(args) +} + +#' Handle the number of contrasts in the differential expression analysis. +#' It returns a matrix of the contrasts to be analyzed. +#' +#' @param l +#' @param contrast_str +#' @param lvls number of doncitions +#' +#' @return +#' +parse_contrasts <- function(l, contrast_str, lvls) { + if (contrast_str == "pairwise") { + if (control_str == "") { + contrast_mat <- matrix(nrow = l * (l - 1) / 2, ncol = l, dimnames = list(Contrasts = rep(NA, l * (l - 1) / 2), Levels = lvls)) + c <- 1 + for (i in 1:(l - 1)) { + for (j in (i + 1):l) { + comparison <- rep(0, l) + comparison[i] <- 1 + comparison[j] <- -1 + contrast_mat[c,] <- comparison + rownames(contrast_mat)[c] <- paste0(lvls[i], "-", lvls[j]) + c <- c + 1 + } + } + } else { + control <- which(as.character(lvls) == control_str) + if (length(control) == 0) { + stop("Control condition not part of found levels.n", call. = FALSE) + } + contrast_mat <- matrix(nrow = l - 1, ncol = l, dimnames = list(Contrasts = rep(NA, l - 1), Levels = lvls)) + c <- 1 + for (j in setdiff(1:l, control)) { + comparison <- rep(0, l) + comparison[i] <- -1 + comparison[j] <- 1 + contrast_mat[c,] <- comparison + rownames(contrast_mat)[c] <- paste0(lvls[i], "-", lvls[j]) + c <- c + 1 + } + } + } else { + contrast_lst <- unlist(strsplit(contrast_str, ";")) + contrast_mat <- make_contrasts(contrast_lst, lvls) + } + print("Contrasts to be tested:") + print(contrast_mat) + return(contrast_mat) +} + +#' This functions hels to define the contrasts that will be compare. +#' +#' @param contrasts +#' @param levels +#' +#' @return +make_contrasts <- function(contrasts, levels) { + #helper function + indicatorRow <- function(pos,len){ + row <- rep(0,len) + row[pos] <- 1 + return(row) + } + + if (is.factor(levels)) levels <- levels(levels) + if (!is.character(levels)) levels <- colnames(levels) + + l <- length(levels) + if (l < 1){ + stop("No levels given") + } + + ncontr <- length(contrasts) + if (ncontr < 1){ + stop("No contrasts given") + } + + levelsenv <- new.env() + for (i in 1:l) { + assign(levels[i], indicatorRow(i,l), pos=levelsenv) + } + + contrastmat <- matrix(0, l, ncontr, dimnames=list(Levels=levels,Contrasts=contrasts)) + for (j in 1:ncontr) { + contrastsj <- parse(text=contrasts[j]) + contrastmat[,j] <- eval(contrastsj, envir=levelsenv) + } + return(t(contrastmat)) +} + +#' Get missing samples by condition +#' +#' @param processedData +#' +#' @return +get_missing_in_condition <- function(processedData) { + p <- processedData + n_samples <- aggregate(p$SUBJECT, by = list(p$GROUP), FUN = function(x) {return(length(unique(as.numeric(x))))}) + colnames(n_samples) <- c("GROUP", "n_samples") + p <- p[complete.cases(p["LogIntensities"]),][,c("Protein", "GROUP", "SUBJECT")] + p_dup <- p[!duplicated(p),] + p_dup_agg <- aggregate(p_dup$SUBJECT, by = list(p_dup$Protein, p_dup$GROUP), length) + colnames(p_dup_agg) <- c("Protein", "GROUP", "non_na") + agg_join <- merge(p_dup_agg, n_samples, by = "GROUP") + agg_join$missingInCondition <- 1 - agg_join$non_na / agg_join$n_samples + + p <- dcast(setDT(agg_join), Protein~GROUP, value.var = "missingInCondition") + return(p) + } + +### End Function Sections + +char_to_boolean <- c("true"=TRUE, "false"=FALSE) usage <- "Rscript msstats_tmt.R input.csv [list of contrasts or 'pairwise'] [default control condition or '']... [normalization based reference channel]" -if (length(args)<1) { - print(usage) - stop("At least the first argument must be supplied (input csv).n", call.=FALSE) -} -if (length(args)<2) { - # contrasts - args[2] = "pairwise" -} -if (length(args)<3) { - # default control condition - args[3] = "" -} -if (length(args)<4) { - # removeOneFeatProts - args[4] = FALSE -} -rmProtein_with1Feature = args[4] +args <- initialize_msstats(usage = usage) + +rmProtein_with1Feature <- args[4] if(typeof(rmProtein_with1Feature) == 'character'){ - rmProtein_with1Feature = char_to_boolean[rmProtein_with1Feature] + rmProtein_with1Feature <- char_to_boolean[rmProtein_with1Feature] } if (length(args)<5) { # use unique peptide - args[5] = TRUE + args[5] <- TRUE } -useUniquePeptide = args[5] +useUniquePeptide <- args[5] if(typeof(useUniquePeptide) == 'character'){ - useUniquePeptide = char_to_boolean[useUniquePeptide] + useUniquePeptide <- char_to_boolean[useUniquePeptide] } if (length(args)<6) { # remove the features that have 1 or 2 measurements within each Run. - args[6] = TRUE + args[6] <- TRUE } -rmPSM_withfewMea_withinRun = args[6] +rmPSM_withfewMea_withinRun <- args[6] if(typeof(rmPSM_withfewMea_withinRun) == 'character'){ - rmPSM_withfewMea_withinRun = char_to_boolean[rmPSM_withfewMea_withinRun] + rmPSM_withfewMea_withinRun <- char_to_boolean[rmPSM_withfewMea_withinRun] } if (length(args)<7) { # sum or max - when there are multiple measurements for certain feature in certain Run. - args[7] = 'sum' + args[7] <- 'sum' } if (length(args)<8) { # summarization methods to protein-level can be performed: "msstats(default)" - args[8] = "msstats" + args[8] <- "msstats" } if (length(args)<9) { # Global median normalization on peptide level data - args[9] = TRUE + args[9] <- TRUE } -global_norm = args[9] +global_norm <- args[9] if(typeof(global_norm) == 'character'){ - global_norm = char_to_boolean[global_norm] + global_norm <- char_to_boolean[global_norm] } if (length(args)<10) { # Remove norm channel - args[10] = TRUE + args[10] <- TRUE } -remove_norm_channel = args[10] +remove_norm_channel <- args[10] if(typeof(remove_norm_channel) == 'character'){ - remove_norm_channel = char_to_boolean[remove_norm_channel] + remove_norm_channel <- char_to_boolean[remove_norm_channel] } if (length(args)<11) { # default Reference channel based normalization between MS runs on protein level data. # Reference Channel annotated by 'Norm' in Condition. - args[11] = TRUE + args[11] <- TRUE } -reference_norm = args[11] +reference_norm <- args[11] if(typeof(reference_norm) == 'character'){ - reference_norm = char_to_boolean[reference_norm] + reference_norm <- char_to_boolean[reference_norm] } csv_input <- args[1] contrast_str <- args[2] control_str <- args[3] -require(MSstatsTMT) # read dataframe into MSstatsTMT data <- read.csv(csv_input) quant <- OpenMStoMSstatsTMTFormat(data, useUniquePeptide=useUniquePeptide, rmPSM_withfewMea_withinRun=rmPSM_withfewMea_withinRun, @@ -102,51 +230,11 @@ dataProcessPlotsTMT(processed.quant, "QCPlot", width=12, height=12, which.Protei lvls <- levels(as.factor(processed.quant$ProteinLevelData$Condition)) l <- length(lvls) -if (l == 1) -{ + +if (l == 1) { print("Only one condition found. No contrasts to be tested. If this is not the case, please check your experimental design.") } else { - if (contrast_str == "pairwise") - { - if (control_str == "") - { - contrast_mat <- matrix(nrow = l * (l-1) / 2, ncol = l, dimnames=list(Contrasts=rep(NA, l * (l-1) / 2), Levels=lvls)) - c <- 1 - for (i in 1:(l-1)) - { - for (j in (i+1):l) - { - comparison <- rep(0,l) - comparison[i] <- 1 - comparison[j] <- -1 - contrast_mat[c,] <- comparison - rownames(contrast_mat)[c] <- paste0(lvls[i],"-",lvls[j]) - c <- c+1 - } - } - } else { - control <- which(as.character(lvls) == control_str) - if (length(control) == 0) - { - stop("Control condition not part of found levels.n", call.=FALSE) - } - - contrast_mat <- matrix(nrow = l-1, ncol = l, dimnames=list(Contrasts=rep(NA, l-1),Levels=lvls)) - c <- 1 - for (j in setdiff(1:l,control)) - { - comparison <- rep(0,l) - comparison[i] <- -1 - comparison[j] <- 1 - contrast_mat[c,] <- comparison - rownames(contrast_mat)[c] <- paste0(lvls[i],"-",lvls[j]) - c <- c+1 - } - } - } else { - contrast_lst <- unlist(strsplit(contrast_str,";")) - contrast_mat <- make_contrasts(contrast_lst, lvls) - } + contrast_mat <- parse_contrasts(l = l, contrast_str = contrast_str, lvls = lvls) print ("Contrasts to be tested:") print (contrast_mat) #TODO allow for user specified contrasts @@ -154,15 +242,4 @@ if (l == 1) #TODO allow manual input (e.g. proteins of interest) write.table(test.MSstatsTMT$ComparisonResult, file=paste0("msstatsiso_results.csv"), quote=FALSE, sep='\t', row.names = FALSE) - - #write comparison to CSV (one CSV per contrast) - # writeComparisonToCSV <- function(DF) - # { - # write.table(DF, file=paste0("comparison_",unique(DF$Label),".csv"), quote=FALSE, sep='\t', row.names = FALSE) - # return(DF) - # } - # ComparisonResultSplit <- split(test.MSstatsTMT$ComparisonResult, test.MSstatsTMT$ComparisonResult$Label) - # for(i in 1:length(ComparisonResultSplit)){ - # writeComparisonToCSV(ComparisonResultSplit[[i]]) - # } } diff --git a/bin/msstats_utils.R b/bin/msstats_utils.R new file mode 100644 index 00000000..9106d4b6 --- /dev/null +++ b/bin/msstats_utils.R @@ -0,0 +1,147 @@ +### Begining Functions section + +#' Inizialize the TMT and LFQ parameters +#' +#' @param usage message to exit the script analysis +#' +#' @return +initialize_msstats <- function(usage) { + args <- commandArgs(trailingOnly = TRUE) + if (length(args) < 1) { + print(usage) + stop("At least the first argument must be supplied (input csv).n", call. = FALSE) + } + if (length(args) < 2) { + args[2] <- "pairwise" + } + + if (length(args) < 3) { + # default control condition + args[3] <- "" + } + + if (length(args) < 4) { + # removeOneFeatProts + args[4] <- FALSE + } + return(args) +} + +#' Handle the number of contrasts in the differential expression analysis. +#' It returns a matrix of the contrasts to be analyzed. +#' +#' @param l +#' @param contrast_str +#' @param lvls number of doncitions +#' +#' @return +#' +parse_contrasts <- function(l, contrast_str, lvls) { + if (contrast_str == "pairwise") { + if (control_str == "") { + contrast_mat <- matrix(nrow = l * (l - 1) / 2, ncol = l, dimnames = list(Contrasts = rep(NA, l * (l - 1) / 2), Levels = lvls)) + c <- 1 + for (i in 1:(l - 1)) { + for (j in (i + 1):l) { + comparison <- rep(0, l) + comparison[i] <- 1 + comparison[j] <- -1 + contrast_mat[c,] <- comparison + rownames(contrast_mat)[c] <- paste0(lvls[i], "-", lvls[j]) + c <- c + 1 + } + } + } else { + control <- which(as.character(lvls) == control_str) + if (length(control) == 0) { + stop("Control condition not part of found levels.n", call. = FALSE) + } + contrast_mat <- matrix(nrow = l - 1, ncol = l, dimnames = list(Contrasts = rep(NA, l - 1), Levels = lvls)) + c <- 1 + for (j in setdiff(1:l, control)) { + comparison <- rep(0, l) + comparison[i] <- -1 + comparison[j] <- 1 + contrast_mat[c,] <- comparison + rownames(contrast_mat)[c] <- paste0(lvls[i], "-", lvls[j]) + c <- c + 1 + } + } + } else { + contrast_lst <- unlist(strsplit(contrast_str, ";")) + contrast_mat <- make_contrasts(contrast_lst, lvls) + } + print("Contrasts to be tested:") + print(contrast_mat) + return(contrast_mat) +} + +#' This functions hels to define the contrasts that will be compare. +#' +#' @param contrasts +#' @param levels +#' +#' @return +make_contrasts <- function(contrasts, levels) { + #helper function + indicatorRow <- function(pos,len){ + row <- rep(0,len) + row[pos] <- 1 + return(row) + } + + if (is.factor(levels)) levels <- levels(levels) + if (!is.character(levels)) levels <- colnames(levels) + + l <- length(levels) + if (l < 1){ + stop("No levels given") + } + + ncontr <- length(contrasts) + if (ncontr < 1){ + stop("No contrasts given") + } + + levelsenv <- new.env() + for (i in 1:l) { + assign(levels[i], indicatorRow(i,l), pos=levelsenv) + } + + contrastmat <- matrix(0, l, ncontr, dimnames=list(Levels=levels,Contrasts=contrasts)) + for (j in 1:ncontr) { + contrastsj <- parse(text=contrasts[j]) + contrastmat[,j] <- eval(contrastsj, envir=levelsenv) + } + return(t(contrastmat)) +} + +#' Get missing samples by condition +#' +#' @param processedData +#' +#' @return +get_missing_in_condition <- function(processedData) { + p <- processedData + n_samples <- aggregate(p$SUBJECT, by = list(p$GROUP), FUN = function(x) {return(length(unique(as.numeric(x))))}) + colnames(n_samples) <- c("GROUP", "n_samples") + p <- p[complete.cases(p["LogIntensities"]),][,c("Protein", "GROUP", "SUBJECT")] + p_dup <- p[!duplicated(p),] + p_dup_agg <- aggregate(p_dup$SUBJECT, by = list(p_dup$Protein, p_dup$GROUP), length) + colnames(p_dup_agg) <- c("Protein", "GROUP", "non_na") + agg_join <- merge(p_dup_agg, n_samples, by = "GROUP") + agg_join$missingInCondition <- 1 - agg_join$non_na / agg_join$n_samples + + p <- dcast(setDT(agg_join), Protein~GROUP, value.var = "missingInCondition") + return(p) + } + +### End Function Sections + + + + + + + + diff --git a/conf/test.config b/conf/test.config deleted file mode 100644 index bb1d959d..00000000 --- a/conf/test.config +++ /dev/null @@ -1,36 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests (ISO) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/quantms -profile test, [--outdir ] - -------------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile DDA ISO' - config_profile_description = 'Minimal test dataset to check pipeline function of the isotopic labelling branch of the pipeline' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - outdir = "./results_iso" - tracedir = "${params.outdir}/pipeline_info" - - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv' - - database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta' - posterior_probabilities = "percolator" - search_engines = "msgf" - protein_level_fdr_cutoff = 0.01 - decoy_string = "rev" - add_decoys = true - variable_mods = 'Oxidation (M)' - fixed_mods = 'Methylthio (C)' -} diff --git a/conf/test_dia.config b/conf/test_dia.config index 8ddcc6de..bd5eac56 100644 --- a/conf/test_dia.config +++ b/conf/test_dia.config @@ -23,8 +23,8 @@ params { tracedir = "${params.outdir}/pipeline_info" // Input data - input = 'https://raw.githubusercontent.com/daichengxin/quantms/dev/assets/PXD026600.sdrf.tsv' - database = 'ftp://massive.ucsd.edu/MSV000087597/sequence/REF_EColi_K12_UPS1_combined.fasta' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/dia_ci/PXD026600.sdrf.tsv' + database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/dia_ci/REF_EColi_K12_UPS1_combined.fasta' min_pr_mz = 350 max_pr_mz = 950 min_fr_mz = 500 diff --git a/conf/test_full.config b/conf/test_tmt.config similarity index 81% rename from conf/test_full.config rename to conf/test_tmt.config index 1a2e1f1b..c49e8718 100644 --- a/conf/test_full.config +++ b/conf/test_tmt.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/quantms -profile test_full, [--outdir ] + nextflow run nf-core/quantms -profile test_tmt, [--outdir ] ---------------------------------------------------------------------------------------- */ @@ -17,11 +17,15 @@ params { outdir = "./results_iso_full" tracedir = "${params.outdir}/pipeline_info" + max_cpus = 2 + max_memory = 6.GB + max_time = 48.h + // Input data for full size test input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv' quant_method = 'ISO' - database = 'https://raw.githubusercontent.com/daichengxin/proteomicstmt/dev/tmt_testdata/erwinia_carotovora.fasta' + database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/erwinia_carotovora.fasta' posterior_probabilities = "percolator" search_engines = "comet,msgf" protein_level_fdr_cutoff = 0.01 diff --git a/nextflow.config b/nextflow.config index 4eb6c34d..e9277f61 100644 --- a/nextflow.config +++ b/nextflow.config @@ -281,12 +281,11 @@ profiles { podman.enabled = false shifter.enabled = false } - test { includeConfig 'conf/test.config' } - test_localize { includeConfig 'conf/test_localize.config'} - test_full { includeConfig 'conf/test_full.config' } - test_lfq { includeConfig 'conf/test_lfq.config' } - test_dia { includeConfig 'conf/test_dia.config' } - mambaci { includeConfig 'conf/mambaci.config' } + test_localize { includeConfig 'conf/test_localize.config' } + test_tmt { includeConfig 'conf/test_tmt.config' } + test_lfq { includeConfig 'conf/test_lfq.config' } + test_dia { includeConfig 'conf/test_dia.config' } + mambaci { includeConfig 'conf/mambaci.config' } } // Load module config after profile, so they can depend on overwritten input parameters specific for each profile.