diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e27813f10..92eeae215a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_consequence` to allow configuration of consequence for the `dbnsfp` `VEP` plugin - [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--vep_version` to allow more configuration on the vep container definition - [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools +- [#623](https://github.com/nf-core/sarek/pull/623) - Additional checks of data in the input sample sheet. ### Changed diff --git a/README.md b/README.md index d93929f39d..783cfe59fa 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ We thank the following people for their extensive assistance in the development - [Jesper Eisfeldt](https://github.com/J35P312) - [Johannes Alneberg](https://github.com/alneberg) - [José Fernández Navarro](https://github.com/jfnavarro) +- [Lasse Westergaard Folkersen](https://github.com/lassefolkersen) - [Lucia Conde](https://github.com/lconde-ucl) - [Malin Larsson](https://github.com/malinlarsson) - [Marcel Martin](https://github.com/marcelm) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 61e6ed5c7c..ac7ed49496 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1005,6 +1005,35 @@ def extract_csv(csv_file) { } } + // Additional check of sample sheet: + // 1. If params.step == "mapping", then each row should specify a lane and the same combination of patient, sample and lane shouldn't be present in different rows. + // 2. The same sample shouldn't be listed for different patients. + def patient_sample_lane_combinations_in_samplesheet = [] + def sample2patient = [:] + + Channel.from(csv_file).splitCsv(header: true) + .map{ row -> + if (params.step == "mapping") { + if ( !row.lane ) { // This also handles the case where the lane is left as an empty string + log.error('The sample sheet should specify a lane for patient "' + row.patient.toString() + '" and sample "' + row.sample.toString() + '".') + System.exit(1) + } + def patient_sample_lane = [row.patient.toString(), row.sample.toString(), row.lane.toString()] + if (patient_sample_lane in patient_sample_lane_combinations_in_samplesheet) { + log.error('The patient-sample-lane combination "' + row.patient.toString() + '", "' + row.sample.toString() + '", and "' + row.lane.toString() + '" is present multiple times in the sample sheet.') + System.exit(1) + } else { + patient_sample_lane_combinations_in_samplesheet.add(patient_sample_lane) + } + } + if (!sample2patient.containsKey(row.sample.toString())) { + sample2patient[row.sample.toString()] = row.patient.toString() + } else if (sample2patient[row.sample.toString()] !== row.patient.toString()) { + log.error('The sample "' + row.sample.toString() + '" is registered for both patient "' + row.patient.toString() + '" and "' + sample2patient[row.sample.toString()] + '" in the sample sheet.') + System.exit(1) + } + } + Channel.from(csv_file).splitCsv(header: true) //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination .map{ row ->