From 7b1833d35db32efebe29d2575750c5387b914c83 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 1 May 2019 17:33:15 +0200
Subject: [PATCH 01/28] update README

---
 README.md | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 4171c4be8e..3e52b0e833 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,13 @@
 
 **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing**.
 
+> :warning: This pipeline is a work in progress being ported to nf-core from [SciLifeLab/Sarek](https://github/SciLifeLab/Sarek)
+
 [![Nextflow version][nextflow-badge]](https://www.nextflow.io)
 [![Travis build status][travis-badge]](https://travis-ci.org/nf-core/sarek)
 
 [![Install with bioconda][bioconda-badge]](http://bioconda.github.io/)
-[![Docker Container available][docker-badge]](https://hub.docker.com/r/nf-core/sarek)
+[![Docker Container available][docker-badge]](https://hub.docker.com/r/nfcore/sarek)
 
 [![Join us on Slack][slack-badge]](https://nfcore.slack.com/messages/CGFUX04HZ/)
 
@@ -136,8 +138,12 @@ For further information or help, don't hesitate to get in touch on [Slack](https
 [<img src="docs/images/NGI_logo.png" alt="National Genomics Infrastructure logo" title="National Genomics Infrastructure" style="padding:20px;height:80px">][ngi-link]
 [<img src="docs/images/NBIS_logo.png" alt="National Bioinformatics Infrastructure Sweden logo" title="National Bioinformatics Infrastructure Sweden" style="padding:20px;height:80px">][nbis-link]
 
-[bioconda-badge]: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=popout&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADEAAAAyCAYAAAD1CDOyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAa2SURBVGiBxZprjFVXFcd/e2aA6UAoYGrk0aZYKvXdwWAyoDyswRqhxmpNjQFrNOIHTR+aJhoxrWBoAz4aGz80bdWCsW1qq5IGSlvDQA1aUGw7BEuR4dFCrVSY0qFYYH5+WHt674zzOHdm7sw/uTn7nLP2Put/z9prr7X2SQwh1InASqAJmAyMBcYDbUA7cAR4HngOaAZ2ppTODqUOA4Jar16mTsjnU9THLIYT6j3qPDWNlPI/V29X31T3qV9Ux6tJ/WlBIp14Vl2m1lZb8Tnqwtz+XH54i7olt9eoreqMTOSOComo/kVtrIbyo9Ufqe3qWLVR3azuzg++LR9vzcfvq+/NRO4bAJEz6koLvpWaAgQmAVuAm4DtKaV2YBlwBfBIFuucnOOADmAKsCalJPDriv6xQB3wPeBx9YL+hPskoU4hvEhTvvRCPp7IfccBp4HZ+V4jsBeYASxXa4AVlXN4CwuBreqFfQn1SkJtAL4N7AG2AvuBV/LtscBh4FribSwANgMfBp4G/pRSOgzcCMwdBAmAy4Bt6rRBjtMV6i3qDdl+V+TjLfn4NUtu99QA5kNv2G2sQ/+HHn2zegmwBJgEzAcOAuuB4ymlHVmmFvgK8BFgFvBX4HJgaUrpWfVtwCjgVD5OA94DzMtjTx3A//cosCTPtd6hvl99PbPfpD6S283q17PMSnV2bjeoi8yutwjUWvXThnuuFDcWGXyz4Sr/mzvtVNfl9t1Z7ol8fldRxft43nL13xWQeMOwlF4H/WAWbM9E9ufz/cZCtifL3aduVScPhkTZc6dbWnOK4A99DTY/K38gC/9G/V1uH1NXZLkr1fOGgkDZsyeoT1ZAZF5Pg0xVP5oFHlbvVM+qe9QfG6vovqFUvAcdxqnPFSTxaPfO09WfGK7xP1nouLpK3WG4ytvsb1INDZFLy3ToCx3qzPKOt2alG9Ql6sYspGH7q9TvWu0Is6TPsoJv4wflnf6ZL35LPV+9X12oXmX4+2GFWmOE5v1hb2eHi/KFM+qasoHOM5KV76gb1DnDTGRJwbdxMeoX1O1G6FyrfsaYGzeUCR4wgrnhJJEsufi+cF0N8C8iWhwD3A6sBe4G7gDuyWM+kFLqGE4SObR4qIDoLOCtgK4j/14wXOxydZQReiyuqsa9QP1EgTexKakfB64DJgIX5t+EPM43iaTGlNKJESDxdsJS+sK+pL5KRKsALwOHgKNEmeUUsDqldKhqmvYD9SSRfPWGYxiVip5w1lh0BpOZDRrq4X7M6XQdkSfUAqOJ3HYUUJ+vTQSOjRiDQH8OJdUB19D1db1BVOqOAgeAjVVRrTjO7+f+63XA9UQhYAxB5gKiBNkIfAmYpLallI5XU9OeYKSj/ZFoQ61Tf9bNzl4zQpCp2SavHA6lu0NdUMDFPlkHfBZYRZjNHOBiYDuwDthG5MZNwKYR4FEk5d2LulQ9alQpGtSrjSrf/WVs9zgCBV+LZXvLO3OJThw0MqxLM5GPqavVv6vzh5lAEVNSnVmXUmpVXyJKKE8R5vM34DHgGeBVYCml6t9wEEjA6gKiL6aUnu/stCaz+oD6DXW9USzQiKXWGZHu+6qqfUY26SJYW95pprG/ME09lwVeU39hKRx+ybJ8o4oEphlztAgau3depl6bb/7RrpWHjca+wYtG5je6SgTq83OKoLmnAWoykXvV01mwLZ+fVA+pDxrZ3ga1fogJjFV/X5CA9rZ2GRWPTmyztPfWalT9Dlh6W09YYO+gIIEpRlWlKLbam8tXZxt12HvVI7nDP9SncnujelPZYK+onx8kgWssPgc0agFdHEyXvDlXvK8HvkzET7uIvGIu0EJsoHTmHmeAPwMz1B+qCypQvFb9pLoNeBB4RwW8V6WUWrro3cMDRhHbW4kICmcBuzMZgV8SIfpB4GYikfoUsRFzCbG+PA60EtFwGxHmTyVK+/OBxQystN8MXJFSOtcniUykAfgQEbvUE3sPY4hUcTxwF7EgLiJ2iBYBDwNXD0CxotgPzEkp9ZeulqBOVH9leIynjZJ6u/pVY8+iQ91leLI31WcqsOtK8bI6Y0DUjVrUkW4DXmUpMPttPm6xemhV39WXnn0WxFJKu4md0R1llycD7yZs/fJ8rVop7HZgbkpp76BHMkL0Ow0TWm9EtRvyP1UNUzqnrjWczNDCCM13qjdbCkuah5jALrWpf20GR6RWfadRJdTSvBgsWoywp66qBHogs9j45qNtgIqfMCLlhQ6iYD0kKac6hsjDm4gqyXTgIqCBqKC0AScpfbTVQumjrXM9jVkJ/gfEGHquO3j8DQAAAABJRU5ErkJggg==
-[docker-badge]: https://img.shields.io/docker/automated/nf-core/sarek.svg?style=popout&logo=docker
-[nextflow-badge]: https://img.shields.io/badge/nextflow-%E2%89%A519.04.0-brightgreen.svg?style=popout&logo=data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+PHN2ZyAgIHhtbG5zOmRjPSJodHRwOi8vcHVybC5vcmcvZGMvZWxlbWVudHMvMS4xLyIgICB4bWxuczpjYz0iaHR0cDovL2NyZWF0aXZlY29tbW9ucy5vcmcvbnMjIiAgIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyIgICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiAgIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgICB4bWxuczpzb2RpcG9kaT0iaHR0cDovL3NvZGlwb2RpLnNvdXJjZWZvcmdlLm5ldC9EVEQvc29kaXBvZGktMC5kdGQiICAgeG1sbnM6aW5rc2NhcGU9Imh0dHA6Ly93d3cuaW5rc2NhcGUub3JnL25hbWVzcGFjZXMvaW5rc2NhcGUiICAgd2lkdGg9IjEyLjc5OTIyOG1tIiAgIGhlaWdodD0iMTIuODA0ODA5bW0iICAgdmlld0JveD0iMCAwIDQ1LjM1MTU5NCA0NS4zNzEzNjkiICAgaWQ9InN2Zzc2NTIiICAgdmVyc2lvbj0iMS4xIiAgIGlua3NjYXBlOnZlcnNpb249IjAuOTEgcjEzNzI1IiAgIHNvZGlwb2RpOmRvY25hbWU9Im5leHRmbG93LWZhdmljb24td2hpdGUuc3ZnIj4gIDxkZWZzICAgICBpZD0iZGVmczc2NTQiIC8+ICA8c29kaXBvZGk6bmFtZWR2aWV3ICAgICBpZD0iYmFzZSIgICAgIHBhZ2Vjb2xvcj0iI2ZmZmZmZiIgICAgIGJvcmRlcmNvbG9yPSIjNjY2NjY2IiAgICAgYm9yZGVyb3BhY2l0eT0iMS4wIiAgICAgaW5rc2NhcGU6cGFnZW9wYWNpdHk9IjAuMCIgICAgIGlua3NjYXBlOnBhZ2VzaGFkb3c9IjIiICAgICBpbmtzY2FwZTp6b29tPSI3LjkxOTU5NTkiICAgICBpbmtzY2FwZTpjeD0iMjAuMTEzMjM1IiAgICAgaW5rc2NhcGU6Y3k9IjIzLjE2MzkwOCIgICAgIGlua3NjYXBlOmRvY3VtZW50LXVuaXRzPSJweCIgICAgIGlua3NjYXBlOmN1cnJlbnQtbGF5ZXI9ImxheWVyMSIgICAgIHNob3dncmlkPSJmYWxzZSIgICAgIGZpdC1tYXJnaW4tdG9wPSIwIiAgICAgZml0LW1hcmdpbi1sZWZ0PSIwIiAgICAgZml0LW1hcmdpbi1yaWdodD0iMCIgICAgIGZpdC1tYXJnaW4tYm90dG9tPSIwIiAgICAgaW5rc2NhcGU6d2luZG93LXdpZHRoPSIxOTIwIiAgICAgaW5rc2NhcGU6d2luZG93LWhlaWdodD0iMTAxNSIgICAgIGlua3NjYXBlOndpbmRvdy14PSIwIiAgICAgaW5rc2NhcGU6d2luZG93LXk9IjAiICAgICBpbmtzY2FwZTp3aW5kb3ctbWF4aW1pemVkPSIxIiAvPiAgPG1ldGFkYXRhICAgICBpZD0ibWV0YWRhdGE3NjU3Ij4gICAgPHJkZjpSREY+ICAgICAgPGNjOldvcmsgICAgICAgICByZGY6YWJvdXQ9IiI+ICAgICAgICA8ZGM6Zm9ybWF0PmltYWdlL3N2Zyt4bWw8L2RjOmZvcm1hdD4gICAgICAgIDxkYzp0eXBlICAgICAgICAgICByZGY6cmVzb3VyY2U9Imh0dHA6Ly9wdXJsLm9yZy9kYy9kY21pdHlwZS9TdGlsbEltYWdlIiAvPiAgICAgICAgPGRjOnRpdGxlPjwvZGM6dGl0bGU+ICAgICAgPC9jYzpXb3JrPiAgICA8L3JkZjpSREY+ICA8L21ldGFkYXRhPiAgPGcgICAgIGlua3NjYXBlOmxhYmVsPSJMYXllciAxIiAgICAgaW5rc2NhcGU6Z3JvdXBtb2RlPSJsYXllciIgICAgIGlkPSJsYXllcjEiICAgICB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMTQuMTA0MzcsLTQ1Mi41MzM2NikiPiAgICA8cGF0aCAgICAgICBzdHlsZT0iZmlsbDojZmZmZmZmIiAgICAgICBkPSJtIC0xMTQuMTA0MzcsNDU1LjQ2NTYyIDAsOC44NjEzMyAwLjIwMzEzLDAuMDYwNSBjIDMuODcyOTMsMS4xMzk0MyA4LjY1MjUxLDQuMzgzMiAxMi44MDA3OCw4LjY4NzUgMC45MTM2MywwLjk0ODAxIDEuOTcyNTY0LDIuMTA2ODQgMi4zNTM1MjQsMi41NzYxOCBsIDAuNjkxNCwwLjg1MzUxIC0wLjg2OTE0LDAuNzc1MzkgYyAtNC4xOTk5MDQsMy43NDE5MyAtOC45NzE5MDQsNi43NjYzNyAtMTQuMTA1NDc0LDguOTQxNDEgLTAuMzA5NzUsMC4xMzEyNCAtMC42OTcyMiwwLjI4MTIzIC0xLjA3NDIyLDAuNDI3NzMgbCAwLDkuMzA0NjkgYyAyLjY1OTkzLC0wLjg3NzkyIDUuMzA2MzksLTEuOTc1IDguMDYwNTUsLTMuMzUxNTYgNC4yNTYyMywtMi4xMjczMiA3LjU0MzI1NCwtNC4yNTc2NCAxMS4wMzcxMTQsLTcuMTU2MjUgMC45MjU4MSwtMC43NjgwOCAxLjgyMTA5LC0xLjUwNzAyIDEuOTkwMjMsLTEuNjQyNTggMC4yNzkzMSwtMC4yMjM4NCAwLjQ5MzMyLC0wLjA1MTQgMi4zMjQyMiwxLjg3ODkxIDYuMjIyNjUsNi41NjA0MSAxMy43ODMzNywxMC43NDQ0MyAyMS45Mzk0NiwxMi4yMjI2NSBsIDAsLTguOTQxNCBjIC00Ljc5NTM3LC0xLjE5NTkgLTkuNDIwMzEsLTMuNjQ1MTEgLTEzLjI1NzgyLC03LjA2NDQ2IC0xLjY4MzUxLC0xLjUwMDA2IC00LjI4NjgxLC00LjM1MDA5IC00LjM5MjU4LC00LjgwODU5IC0wLjA2ODYsLTAuMjk3MyA1LjQ3NDgsLTUuNzA5NzcgNy4yOTQ5MywtNy4xMjMwNSAzLjQ4MjczLC0yLjcwNDI0IDYuNTg4MjUsLTQuMTIwNDIgMTAuMjc1MzksLTQuNjg1NTQgMC4wMjc1LC0wLjAwNCAwLjA1MjcsLTAuMDA4IDAuMDgwMSwtMC4wMTE3IGwgMCwtOC43NSBjIC03LjkzOTI3LDIuMDIxMTQgLTE0Ljg3MDAxLDUuODc3MzggLTIxLjUsMTEuOTQzMzYgbCAtMS42MzA4NiwxLjQ5MjE4IC0yLjk5NjEsLTMuMDA3ODEgYyAtMS42NDc1NiwtMS42NTQ3IC0zLjc0MDI1LC0zLjYwMTU3IC00LjY1MjM0LC00LjMyNjE3IC01LjAwODU1NCwtMy45Nzg5OSAtMTAuMTUyOTU0LC02LjQ5OTIzIC0xNC41NzIyNzQsLTcuMTU2MjUgeiIgICAgICAgaWQ9InBhdGg3NjIwIiAgICAgICBpbmtzY2FwZTpjb25uZWN0b3ItY3VydmF0dXJlPSIwIiAgICAgICBzb2RpcG9kaTpub2RldHlwZXM9ImNjY3NjY2NzY2Nzc2NzY2NzY3NjY2NjY2NzYyIgLz4gIDwvZz48L3N2Zz4=
-[travis-badge]: https://img.shields.io/travis/nf-core/sarek.svg?style=popout&logo=travis
-[slack-badge]: https://img.shields.io/badge/slack-nfcore/sarek-blue.svg?style=popout&logo=slack
+[bioconda-badge]: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADEAAAAyCAYAAAD1CDOyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAa2SURBVGiBxZprjFVXFcd/e2aA6UAoYGrk0aZYKvXdwWAyoDyswRqhxmpNjQFrNOIHTR+aJhoxrWBoAz4aGz80bdWCsW1qq5IGSlvDQA1aUGw7BEuR4dFCrVSY0qFYYH5+WHt674zzOHdm7sw/uTn7nLP2Put/z9prr7X2SQwh1InASqAJmAyMBcYDbUA7cAR4HngOaAZ2ppTODqUOA4Jar16mTsjnU9THLIYT6j3qPDWNlPI/V29X31T3qV9Ux6tJ/WlBIp14Vl2m1lZb8Tnqwtz+XH54i7olt9eoreqMTOSOComo/kVtrIbyo9Ufqe3qWLVR3azuzg++LR9vzcfvq+/NRO4bAJEz6koLvpWaAgQmAVuAm4DtKaV2YBlwBfBIFuucnOOADmAKsCalJPDriv6xQB3wPeBx9YL+hPskoU4hvEhTvvRCPp7IfccBp4HZ+V4jsBeYASxXa4AVlXN4CwuBreqFfQn1SkJtAL4N7AG2AvuBV/LtscBh4FribSwANgMfBp4G/pRSOgzcCMwdBAmAy4Bt6rRBjtMV6i3qDdl+V+TjLfn4NUtu99QA5kNv2G2sQ/+HHn2zegmwBJgEzAcOAuuB4ymlHVmmFvgK8BFgFvBX4HJgaUrpWfVtwCjgVD5OA94DzMtjTx3A//cosCTPtd6hvl99PbPfpD6S283q17PMSnV2bjeoi8yutwjUWvXThnuuFDcWGXyz4Sr/mzvtVNfl9t1Z7ol8fldRxft43nL13xWQeMOwlF4H/WAWbM9E9ufz/cZCtifL3aduVScPhkTZc6dbWnOK4A99DTY/K38gC/9G/V1uH1NXZLkr1fOGgkDZsyeoT1ZAZF5Pg0xVP5oFHlbvVM+qe9QfG6vovqFUvAcdxqnPFSTxaPfO09WfGK7xP1nouLpK3WG4ytvsb1INDZFLy3ToCx3qzPKOt2alG9Ql6sYspGH7q9TvWu0Is6TPsoJv4wflnf6ZL35LPV+9X12oXmX4+2GFWmOE5v1hb2eHi/KFM+qasoHOM5KV76gb1DnDTGRJwbdxMeoX1O1G6FyrfsaYGzeUCR4wgrnhJJEsufi+cF0N8C8iWhwD3A6sBe4G7gDuyWM+kFLqGE4SObR4qIDoLOCtgK4j/14wXOxydZQReiyuqsa9QP1EgTexKakfB64DJgIX5t+EPM43iaTGlNKJESDxdsJS+sK+pL5KRKsALwOHgKNEmeUUsDqldKhqmvYD9SSRfPWGYxiVip5w1lh0BpOZDRrq4X7M6XQdkSfUAqOJ3HYUUJ+vTQSOjRiDQH8OJdUB19D1db1BVOqOAgeAjVVRrTjO7+f+63XA9UQhYAxB5gKiBNkIfAmYpLallI5XU9OeYKSj/ZFoQ61Tf9bNzl4zQpCp2SavHA6lu0NdUMDFPlkHfBZYRZjNHOBiYDuwDthG5MZNwKYR4FEk5d2LulQ9alQpGtSrjSrf/WVs9zgCBV+LZXvLO3OJThw0MqxLM5GPqavVv6vzh5lAEVNSnVmXUmpVXyJKKE8R5vM34DHgGeBVYCml6t9wEEjA6gKiL6aUnu/stCaz+oD6DXW9USzQiKXWGZHu+6qqfUY26SJYW95pprG/ME09lwVeU39hKRx+ybJ8o4oEphlztAgau3depl6bb/7RrpWHjca+wYtG5je6SgTq83OKoLmnAWoykXvV01mwLZ+fVA+pDxrZ3ga1fogJjFV/X5CA9rZ2GRWPTmyztPfWalT9Dlh6W09YYO+gIIEpRlWlKLbam8tXZxt12HvVI7nDP9SncnujelPZYK+onx8kgWssPgc0agFdHEyXvDlXvK8HvkzET7uIvGIu0EJsoHTmHmeAPwMz1B+qCypQvFb9pLoNeBB4RwW8V6WUWrro3cMDRhHbW4kICmcBuzMZgV8SIfpB4GYikfoUsRFzCbG+PA60EtFwGxHmTyVK+/OBxQystN8MXJFSOtcniUykAfgQEbvUE3sPY4hUcTxwF7EgLiJ2iBYBDwNXD0CxotgPzEkp9ZeulqBOVH9leIynjZJ6u/pVY8+iQ91leLI31WcqsOtK8bI6Y0DUjVrUkW4DXmUpMPttPm6xemhV39WXnn0WxFJKu4md0R1llycD7yZs/fJ8rVop7HZgbkpp76BHMkL0Ow0TWm9EtRvyP1UNUzqnrjWczNDCCM13qjdbCkuah5jALrWpf20GR6RWfadRJdTSvBgsWoywp66qBHogs9j45qNtgIqfMCLlhQ6iYD0kKac6hsjDm4gqyXTgIqCBqKC0AScpfbTVQumjrXM9jVkJ/gfEGHquO3j8DQAAAABJRU5ErkJggg==
+[btb-link]: https://ki.se/forskning/barntumorbanken-0
+[docker-badge]: https://img.shields.io/docker/automated/nfcore/sarek.svg?logo=docker
+[nbis-link]: https://nbis.se
+[nextflow-badge]: https://img.shields.io/badge/nextflow-%E2%89%A519.04.0-brightgreen.svg?logo=data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+PHN2ZyAgIHhtbG5zOmRjPSJodHRwOi8vcHVybC5vcmcvZGMvZWxlbWVudHMvMS4xLyIgICB4bWxuczpjYz0iaHR0cDovL2NyZWF0aXZlY29tbW9ucy5vcmcvbnMjIiAgIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyIgICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiAgIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgICB4bWxuczpzb2RpcG9kaT0iaHR0cDovL3NvZGlwb2RpLnNvdXJjZWZvcmdlLm5ldC9EVEQvc29kaXBvZGktMC5kdGQiICAgeG1sbnM6aW5rc2NhcGU9Imh0dHA6Ly93d3cuaW5rc2NhcGUub3JnL25hbWVzcGFjZXMvaW5rc2NhcGUiICAgd2lkdGg9IjEyLjc5OTIyOG1tIiAgIGhlaWdodD0iMTIuODA0ODA5bW0iICAgdmlld0JveD0iMCAwIDQ1LjM1MTU5NCA0NS4zNzEzNjkiICAgaWQ9InN2Zzc2NTIiICAgdmVyc2lvbj0iMS4xIiAgIGlua3NjYXBlOnZlcnNpb249IjAuOTEgcjEzNzI1IiAgIHNvZGlwb2RpOmRvY25hbWU9Im5leHRmbG93LWZhdmljb24td2hpdGUuc3ZnIj4gIDxkZWZzICAgICBpZD0iZGVmczc2NTQiIC8+ICA8c29kaXBvZGk6bmFtZWR2aWV3ICAgICBpZD0iYmFzZSIgICAgIHBhZ2Vjb2xvcj0iI2ZmZmZmZiIgICAgIGJvcmRlcmNvbG9yPSIjNjY2NjY2IiAgICAgYm9yZGVyb3BhY2l0eT0iMS4wIiAgICAgaW5rc2NhcGU6cGFnZW9wYWNpdHk9IjAuMCIgICAgIGlua3NjYXBlOnBhZ2VzaGFkb3c9IjIiICAgICBpbmtzY2FwZTp6b29tPSI3LjkxOTU5NTkiICAgICBpbmtzY2FwZTpjeD0iMjAuMTEzMjM1IiAgICAgaW5rc2NhcGU6Y3k9IjIzLjE2MzkwOCIgICAgIGlua3NjYXBlOmRvY3VtZW50LXVuaXRzPSJweCIgICAgIGlua3NjYXBlOmN1cnJlbnQtbGF5ZXI9ImxheWVyMSIgICAgIHNob3dncmlkPSJmYWxzZSIgICAgIGZpdC1tYXJnaW4tdG9wPSIwIiAgICAgZml0LW1hcmdpbi1sZWZ0PSIwIiAgICAgZml0LW1hcmdpbi1yaWdodD0iMCIgICAgIGZpdC1tYXJnaW4tYm90dG9tPSIwIiAgICAgaW5rc2NhcGU6d2luZG93LXdpZHRoPSIxOTIwIiAgICAgaW5rc2NhcGU6d2luZG93LWhlaWdodD0iMTAxNSIgICAgIGlua3NjYXBlOndpbmRvdy14PSIwIiAgICAgaW5rc2NhcGU6d2luZG93LXk9IjAiICAgICBpbmtzY2FwZTp3aW5kb3ctbWF4aW1pemVkPSIxIiAvPiAgPG1ldGFkYXRhICAgICBpZD0ibWV0YWRhdGE3NjU3Ij4gICAgPHJkZjpSREY+ICAgICAgPGNjOldvcmsgICAgICAgICByZGY6YWJvdXQ9IiI+ICAgICAgICA8ZGM6Zm9ybWF0PmltYWdlL3N2Zyt4bWw8L2RjOmZvcm1hdD4gICAgICAgIDxkYzp0eXBlICAgICAgICAgICByZGY6cmVzb3VyY2U9Imh0dHA6Ly9wdXJsLm9yZy9kYy9kY21pdHlwZS9TdGlsbEltYWdlIiAvPiAgICAgICAgPGRjOnRpdGxlPjwvZGM6dGl0bGU+ICAgICAgPC9jYzpXb3JrPiAgICA8L3JkZjpSREY+ICA8L21ldGFkYXRhPiAgPGcgICAgIGlua3NjYXBlOmxhYmVsPSJMYXllciAxIiAgICAgaW5rc2NhcGU6Z3JvdXBtb2RlPSJsYXllciIgICAgIGlkPSJsYXllcjEiICAgICB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMTQuMTA0MzcsLTQ1Mi41MzM2NikiPiAgICA8cGF0aCAgICAgICBzdHlsZT0iZmlsbDojZmZmZmZmIiAgICAgICBkPSJtIC0xMTQuMTA0MzcsNDU1LjQ2NTYyIDAsOC44NjEzMyAwLjIwMzEzLDAuMDYwNSBjIDMuODcyOTMsMS4xMzk0MyA4LjY1MjUxLDQuMzgzMiAxMi44MDA3OCw4LjY4NzUgMC45MTM2MywwLjk0ODAxIDEuOTcyNTY0LDIuMTA2ODQgMi4zNTM1MjQsMi41NzYxOCBsIDAuNjkxNCwwLjg1MzUxIC0wLjg2OTE0LDAuNzc1MzkgYyAtNC4xOTk5MDQsMy43NDE5MyAtOC45NzE5MDQsNi43NjYzNyAtMTQuMTA1NDc0LDguOTQxNDEgLTAuMzA5NzUsMC4xMzEyNCAtMC42OTcyMiwwLjI4MTIzIC0xLjA3NDIyLDAuNDI3NzMgbCAwLDkuMzA0NjkgYyAyLjY1OTkzLC0wLjg3NzkyIDUuMzA2MzksLTEuOTc1IDguMDYwNTUsLTMuMzUxNTYgNC4yNTYyMywtMi4xMjczMiA3LjU0MzI1NCwtNC4yNTc2NCAxMS4wMzcxMTQsLTcuMTU2MjUgMC45MjU4MSwtMC43NjgwOCAxLjgyMTA5LC0xLjUwNzAyIDEuOTkwMjMsLTEuNjQyNTggMC4yNzkzMSwtMC4yMjM4NCAwLjQ5MzMyLC0wLjA1MTQgMi4zMjQyMiwxLjg3ODkxIDYuMjIyNjUsNi41NjA0MSAxMy43ODMzNywxMC43NDQ0MyAyMS45Mzk0NiwxMi4yMjI2NSBsIDAsLTguOTQxNCBjIC00Ljc5NTM3LC0xLjE5NTkgLTkuNDIwMzEsLTMuNjQ1MTEgLTEzLjI1NzgyLC03LjA2NDQ2IC0xLjY4MzUxLC0xLjUwMDA2IC00LjI4NjgxLC00LjM1MDA5IC00LjM5MjU4LC00LjgwODU5IC0wLjA2ODYsLTAuMjk3MyA1LjQ3NDgsLTUuNzA5NzcgNy4yOTQ5MywtNy4xMjMwNSAzLjQ4MjczLC0yLjcwNDI0IDYuNTg4MjUsLTQuMTIwNDIgMTAuMjc1MzksLTQuNjg1NTQgMC4wMjc1LC0wLjAwNCAwLjA1MjcsLTAuMDA4IDAuMDgwMSwtMC4wMTE3IGwgMCwtOC43NSBjIC03LjkzOTI3LDIuMDIxMTQgLTE0Ljg3MDAxLDUuODc3MzggLTIxLjUsMTEuOTQzMzYgbCAtMS42MzA4NiwxLjQ5MjE4IC0yLjk5NjEsLTMuMDA3ODEgYyAtMS42NDc1NiwtMS42NTQ3IC0zLjc0MDI1LC0zLjYwMTU3IC00LjY1MjM0LC00LjMyNjE3IC01LjAwODU1NCwtMy45Nzg5OSAtMTAuMTUyOTU0LC02LjQ5OTIzIC0xNC41NzIyNzQsLTcuMTU2MjUgeiIgICAgICAgaWQ9InBhdGg3NjIwIiAgICAgICBpbmtzY2FwZTpjb25uZWN0b3ItY3VydmF0dXJlPSIwIiAgICAgICBzb2RpcG9kaTpub2RldHlwZXM9ImNjY3NjY2NzY2Nzc2NzY2NzY3NjY2NjY2NzYyIgLz4gIDwvZz48L3N2Zz4=
+[ngi-link]: https://ngisweden.scilifelab.se/
+[scilifelab-link]: https://scilifelab.se
+[slack-badge]: https://img.shields.io/badge/slack-nfcore/sarek-blue.svg?logo=slack
+[travis-badge]: https://img.shields.io/travis/nf-core/sarek.svg?logo=travis

From 961856cc3b5fc0b2826a39fc2306b4526b9c2db5 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 1 May 2019 17:55:13 +0200
Subject: [PATCH 02/28] update .travis.yml

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fa72533100..e736da2a64 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,10 +11,10 @@ before_install:
   # PRs to master are only ok if coming from dev branch
   - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
   # Pull the docker image first so the test doesn't wait for this
-  - docker pull maxulysse/sarek:dev
+  - docker pull nfcore/sarek:dev
   # Fake the tag locally so that the pipeline runs properly
   # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1)
-  - docker tag maxulysse/sarek:dev maxulysse/sarek:dev
+  - docker tag nfcore/sarek:dev nfcore/sarek:dev
 
 install:
   # Install Nextflow

From cefd329e686e001216b9ce5cd72ea2401108c7c4 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 1 May 2019 17:55:24 +0200
Subject: [PATCH 03/28] add Jenkinsfile

---
 Jenkinsfile | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 Jenkinsfile

diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 0000000000..b54e5c7deb
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,35 @@
+pipeline {
+    agent any
+
+    environment {
+        JENKINS_API = credentials('api')
+    }
+
+    stages {
+        stage('Setup environment') {
+            steps {
+                sh "docker pull nfcore/sarek:dev"
+            }
+        }
+        stage('Build') {
+            steps {
+              sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git test-data"
+              sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir test-data/reference --outdir References"
+            }
+        }
+        stage('Test') {
+            steps {
+                sh "nextflow run main.nf -profile docker --help"
+            }
+        }
+    }
+
+    post {
+        failure {
+            script {
+                def response = sh(script: "curl -u ${JENKINS_API_USR}:${JENKINS_API_PSW} ${BUILD_URL}/consoleText", returnStdout: true).trim().replace('\n', '<br>')
+                def comment = pullRequest.comment("## :rotating_light: Buil log output:<br><summary><details>${response}</details></summary>")
+            }
+        }
+    }
+}

From 1d3c120f24a6917ab914fd0254aa2fdd25e75772 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 1 May 2019 17:58:00 +0200
Subject: [PATCH 04/28] update conda environment file

---
 environment.yml | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 4e6811ec55..dcdbe97d7d 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,6 +6,25 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  # TODO nf-core: Add required software dependencies here
+  - r-rcolorbrewer=1.1
+  - r-base=3.5.1
+  - bcftools=1.9
+  - bioconductor-rtracklayer=1.42.1
+  - bwa=0.7.17
+  - cancerit-allelecount=2.1.2
+  - control-freec=11.4
+  - ensembl-vep=96.0
   - fastqc=0.11.8
+  - freebayes=1.2.0
+  - gatk4=4.1.1.0
+  - genesplicer=1.0
+  - htslib=1.9
+  - igvtools=2.3.93
+  - manta=1.5.0
   - multiqc=1.7
+  - qualimap=2.2.2b
+  - samtools=1.9
+  - snpeff=4.3.1t
+  - strelka=2.9.10
+  - vcfanno=0.3.1
+  - vcftools=0.1.16

From 95d55117eb19712f1e173b10f977686ab7df4fbf Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 09:49:07 +0200
Subject: [PATCH 05/28] update software_version collect

---
 bin/scrape_software_versions.py | 36 +++++++++++++++++++++++++++++++--
 main.nf                         | 26 ++++++++++++++++--------
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 0cb269ad8c..599043c4ae 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -5,16 +5,48 @@
 
 # TODO nf-core: Add additional regexes for new tools in process get_software_versions
 regexes = {
-    'nf-core/sarek': ['v_pipeline.txt', r"(\S+)"],
-    'Nextflow': ['v_nextflow.txt', r"(\S+)"],
+    'AlleleCount': ['v_allelecount.txt', r"(\S+)"],
+    'ASCAT': ['v_ascat.txt', r"(\d\.\d+)"],
+    'bcftools': ['v_bcftools.txt', r"bcftools (\S+)"],
+    'BWA': ['v_bwa.txt', r"Version: (\S+)"],
     'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
+    'FreeBayes': ['v_freebayes.txt', r"version:  v(\d\.\d\.\d+)"],
+    'GATK': ['v_gatk.txt', r"Version:(\S+)"],
+    'htslib': ['v_samtools.txt', r"htslib (\S+)"],
+    'Manta': ['v_manta.txt', r"([0-9.]+)"],
     'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
+    'Nextflow': ['v_nextflow.txt', r"(\S+)"],
+    'nf-core/sarek': ['v_pipeline.txt', r"(\S+)"],
+    'Picard': ['v_picard.txt', r"Picard version:(\d\.\d\.\d+)"],
+    'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
+    'R': ['v_r.txt', r"R version (\S+)"],
+    'samtools': ['v_samtools.txt', r"samtools (\S+)"],
+    'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"],
+    'Strelka': ['v_strelka.txt', r"([0-9.]+)"],
+    'vcftools': ['v_vcftools.txt', r"([0-9.]+)"],
+    'VEP': ['v_vep.txt', r"ensembl-vep          : (\S+)"],
 }
 results = OrderedDict()
 results['nf-core/sarek'] = '<span style="color:#999999;\">N/A</span>'
 results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
+results['AlleleCount'] = '<span style="color:#999999;\">N/A</span>'
+results['ASCAT'] = '<span style="color:#999999;\">N/A</span>'
+results['bcftools'] = '<span style="color:#999999;\">N/A</span>'
+results['BWA'] = '<span style="color:#999999;\">N/A</span>'
 results['FastQC'] = '<span style="color:#999999;\">N/A</span>'
+results['FreeBayes'] = '<span style="color:#999999;\">N/A</span>'
+results['GATK'] = '<span style="color:#999999;\">N/A</span>'
+results['htslib'] = '<span style="color:#999999;\">N/A</span>'
+results['Manta'] = '<span style="color:#999999;\">N/A</span>'
 results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
+results['Picard'] = '<span style="color:#999999;\">N/A</span>'
+results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
+results['R'] = '<span style="color:#999999;\">N/A</span>'
+results['samtools'] = '<span style="color:#999999;\">N/A</span>'
+results['SnpEff'] = '<span style="color:#999999;\">N/A</span>'
+results['Strelka'] = '<span style="color:#999999;\">N/A</span>'
+results['vcftools'] = '<span style="color:#999999;\">N/A</span>'
+results['VEP'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
diff --git a/main.nf b/main.nf
index dc781271f2..3b8687f20c 100644
--- a/main.nf
+++ b/main.nf
@@ -20,16 +20,15 @@ def helpMessage() {
 
     The typical command for running the pipeline is as follows:
 
-    nextflow run nf-core/sarek --reads '*_R{1,2}.fastq.gz' -profile docker
+    nextflow run nf-core/sarek --sample sample.tsv -profile docker
 
     Mandatory arguments:
-      --reads                       Path to input data (must be surrounded with quotes)
+      --sample                      Path to TSV input file
       -profile                      Configuration profile to use. Can use multiple (comma separated)
                                     Available: conda, docker, singularity, awsbatch, test and more.
 
     Options:
       --genome                      Name of iGenomes reference
-      --singleEnd                   Specifies that the input is single end reads
 
     References                      If not specified in the configuration file or you wish to overwrite any of the references.
       --fasta                       Path to Fasta reference
@@ -176,12 +175,23 @@ process get_software_versions {
     file "software_versions.csv"
 
     script:
-    // TODO nf-core: Get all tools to print their version number here
     """
-    echo $workflow.manifest.version > v_pipeline.txt
-    echo $workflow.nextflow.version > v_nextflow.txt
-    fastqc --version > v_fastqc.txt
-    multiqc --version > v_multiqc.txt
+    bcftools version > v_bcftools.txt 2>&1 || true
+    bwa &> v_bwa.txt 2>&1 || true
+    configManta.py --version > v_manta.txt 2>&1 || true
+    configureStrelkaGermlineWorkflow.py --version > v_strelka.txt 2>&1 || true
+    echo "${workflow.manifest.version}" &> v_pipeline.txt 2>&1 || true
+    echo "${workflow.nextflow.version}" &> v_nextflow.txt 2>&1 || true
+    echo "SNPEFF version"\$(snpEff -h 2>&1) > v_snpeff.txt
+    fastqc --version > v_fastqc.txt 2>&1 || true
+    freebayes --version > v_freebayes.txt 2>&1 || true
+    gatk ApplyBQSR --help 2>&1 | grep Version: > v_gatk.txt 2>&1 || true
+    multiqc --version &> v_multiqc.txt 2>&1 || true
+    qualimap --version &> v_qualimap.txt 2>&1 || true
+    samtools --version &> v_samtools.txt 2>&1 || true
+    vcftools --version &> v_vcftools.txt 2>&1 || true
+    vep --help &> v_vep.txt 2>&1 || true
+
     scrape_software_versions.py &> software_versions_mqc.yaml
     """
 }

From e21ce880e2c1a8cc9e814638b59a8411ae28dfb2 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:09:37 +0200
Subject: [PATCH 06/28] trying to fix travis CI

---
 .travis.yml | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e736da2a64..84b9cecb5d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,18 +2,27 @@ sudo: required
 language: python
 jdk: openjdk8
 services: docker
+
+addons:
+  apt:
+    update: true
+
 python: '3.6'
 cache: pip
+
 matrix:
   fast_finish: true
 
+env:
+  - NXF_VER=19.04.0
+  - NXF_VER=''
+
 before_install:
   # PRs to master are only ok if coming from dev branch
   - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
   # Pull the docker image first so the test doesn't wait for this
   - docker pull nfcore/sarek:dev
   # Fake the tag locally so that the pipeline runs properly
-  # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1)
   - docker tag nfcore/sarek:dev nfcore/sarek:dev
 
 install:
@@ -24,12 +33,13 @@ install:
   # Install nf-core/tools
   - pip install --upgrade pip
   - pip install nf-core
+  # Install Conda
+  - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+  - bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $HOME/miniconda
+  - export PATH="$HOME/miniconda/bin:$PATH"
   # Reset
   - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests
 
-env:
-  - NXF_VER=19.04.0
-
 jobs:
   include:
     - stage: lint

From 00c7704478fa0b4e492ea675179be15c1a5cf7f8 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:14:47 +0200
Subject: [PATCH 07/28] only one env

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 84b9cecb5d..1c6997caab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,6 @@ matrix:
 
 env:
   - NXF_VER=19.04.0
-  - NXF_VER=''
 
 before_install:
   # PRs to master are only ok if coming from dev branch

From 3be2f63cbfef5e64cd4fe077bb1c8d51d557d5bf Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:20:11 +0200
Subject: [PATCH 08/28] remove pip upgrade

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1c6997caab..03be6585db 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,7 +30,6 @@ install:
   - wget -qO- get.nextflow.io | bash
   - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow
   # Install nf-core/tools
-  - pip install --upgrade pip
   - pip install nf-core
   # Install Conda
   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh

From 3cd9748c63e68519853c195aa750668e25e46e7b Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:28:26 +0200
Subject: [PATCH 09/28] remove lint

---
 .travis.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 03be6585db..03cf98bb46 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,21 +29,12 @@ install:
   - mkdir /tmp/nextflow && cd /tmp/nextflow
   - wget -qO- get.nextflow.io | bash
   - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow
-  # Install nf-core/tools
-  - pip install nf-core
-  # Install Conda
-  - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-  - bash Miniconda3-latest-Linux-x86_64.sh -b -f -p $HOME/miniconda
-  - export PATH="$HOME/miniconda/bin:$PATH"
   # Reset
   - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests
 
 jobs:
   include:
-    - stage: lint
-      script: nf-core lint ${TRAVIS_BUILD_DIR}
     - stage: built
-      script: skip
       script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git test-data
       script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir test-data/reference --outdir References
     - stage: test

From 861577cde5d285f8c27fc608ec88e0a5895937b0 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:40:42 +0200
Subject: [PATCH 10/28] remove lib/SarekUtils.groovy

---
 lib/SarekUtils.groovy | 41 ------------------------------------
 main.nf               | 49 ++++++++++++++++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 48 deletions(-)
 delete mode 100644 lib/SarekUtils.groovy

diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
deleted file mode 100644
index b1a90bfcaa..0000000000
--- a/lib/SarekUtils.groovy
+++ /dev/null
@@ -1,41 +0,0 @@
-import static nextflow.Nextflow.file
-import nextflow.Channel
-
-class SarekUtils {
-
-  // Check parameter existence
-  static def checkParameterExistence(it, list) {
-    if (!list.contains(it)) {
-      println("Unknown parameter: ${it}")
-      return false
-    }
-    return true
-  }
-
-  // Compare each parameter with a list of parameters
-  static def checkParameterList(list, realList) {
-    return list.every{ checkParameterExistence(it, realList) }
-  }
-
-  // Loop through all the references files to check their existence
-  static def checkReferenceMap(referenceMap) {
-    referenceMap.every {
-      referenceFile, fileToCheck ->
-      SarekUtils.checkRefExistence(referenceFile, fileToCheck)
-    }
-  }
-
-  // Loop through all the references files to check their existence
-  static def checkRefExistence(referenceFile, fileToCheck) {
-    if (fileToCheck instanceof List) return fileToCheck.every{ SarekUtils.checkRefExistence(referenceFile, it) }
-    def f = file(fileToCheck)
-    // this is an expanded wildcard: we can assume all files exist
-    if (f instanceof List && f.size() > 0) return true
-    else if (!f.exists()) {
-			println  "Missing references: ${referenceFile} ${fileToCheck}"
-      return false
-    }
-    return true
-  }
-
-}
diff --git a/main.nf b/main.nf
index 3b8687f20c..c39a1353ea 100644
--- a/main.nf
+++ b/main.nf
@@ -63,17 +63,17 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
 stepList = defineStepList()
 step = params.step ? params.step.toLowerCase() : ''
 if (step == 'preprocessing' || step == '') step = 'mapping'
-if (!SarekUtils.checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information'
+if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information'
 if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information'
 if (step == 'mapping' && !checkExactlyOne([params.test, params.sample, params.sampleDir]))
   exit 1, 'Please define which samples to work on by providing exactly one of the --test, --sample or --sampleDir options'
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
 toolList = defineToolList()
-if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
+if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
 
 referenceMap = defineReferenceMap(step, tools)
-if (!SarekUtils.checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
+if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
 
 // Has the run name been specified by the user?
 //  this has the bonus effect of catching both -name and --name
@@ -363,15 +363,50 @@ def checkHostname(){
 ========================================================================================
 */
 
+def checkExactlyOne(list) {
+  def n = 0
+  list.each{n += it ? 1 : 0}
+  return n == 1
+}
+
+// Check parameter existence
+def checkParameterExistence(it, list) {
+  if (!list.contains(it)) {
+    println("Unknown parameter: ${it}")
+    return false
+  }
+  return true
+}
+
+// Compare each parameter with a list of parameters
+def checkParameterList(list, realList) {
+  return list.every{ checkParameterExistence(it, realList) }
+}
+
 def checkParamReturnFile(item) {
   params."${item}" = params.genomes[params.genome]."${item}"
   return file(params."${item}")
 }
 
-def checkExactlyOne(list) {
-  def n = 0
-  list.each{n += it ? 1 : 0}
-  return n == 1
+// Loop through all the references files to check their existence
+def checkRefExistence(referenceFile, fileToCheck) {
+  if (fileToCheck instanceof List) return fileToCheck.every{ checkRefExistence(referenceFile, it) }
+  def f = file(fileToCheck)
+  // this is an expanded wildcard: we can assume all files exist
+  if (f instanceof List && f.size() > 0) return true
+  else if (!f.exists()) {
+    println  "Missing references: ${referenceFile} ${fileToCheck}"
+    return false
+  }
+  return true
+}
+
+// Loop through all the references files to check their existence
+def checkReferenceMap(referenceMap) {
+  referenceMap.every {
+    referenceFile, fileToCheck ->
+    checkRefExistence(referenceFile, fileToCheck)
+  }
 }
 
 def defineReferenceMap(step, tools) {

From 6cc6efe8e829fefebdf7adfb54c2c96133a19929 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:46:41 +0200
Subject: [PATCH 11/28] add References to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5b54e3e6c2..6d5ff0b631 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .nextflow*
 work/
 data/
+References/
 results/
 .DS_Store
 tests/test_data

From e2a44ddd7498747cc2655d657844cfc38d0fbd77 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 10:48:14 +0200
Subject: [PATCH 12/28] lower case

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 6d5ff0b631..96675d0154 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 .nextflow*
 work/
 data/
-References/
+references/
 results/
 .DS_Store
 tests/test_data

From 1f0cce6b5dd9fe4173f26e7a14043fe018821a3d Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 11:51:25 +0200
Subject: [PATCH 13/28] remove picard

---
 bin/scrape_software_versions.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 599043c4ae..5667cd24c8 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -17,7 +17,6 @@
     'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
     'nf-core/sarek': ['v_pipeline.txt', r"(\S+)"],
-    'Picard': ['v_picard.txt', r"Picard version:(\d\.\d\.\d+)"],
     'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
     'R': ['v_r.txt', r"R version (\S+)"],
     'samtools': ['v_samtools.txt', r"samtools (\S+)"],
@@ -39,7 +38,6 @@
 results['htslib'] = '<span style="color:#999999;\">N/A</span>'
 results['Manta'] = '<span style="color:#999999;\">N/A</span>'
 results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
-results['Picard'] = '<span style="color:#999999;\">N/A</span>'
 results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
 results['R'] = '<span style="color:#999999;\">N/A</span>'
 results['samtools'] = '<span style="color:#999999;\">N/A</span>'

From 8627fc0861e572630897e6c946262f93d98b170f Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 11:52:22 +0200
Subject: [PATCH 14/28] improve software versions gathering

---
 main.nf | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 164 insertions(+), 17 deletions(-)

diff --git a/main.nf b/main.nf
index c39a1353ea..2060a2dec6 100644
--- a/main.nf
+++ b/main.nf
@@ -3,6 +3,8 @@
 ========================================================================================
                          nf-core/sarek
 ========================================================================================
+New Germline (+ Somatic) Analysis Workflow. Started March 2016.
+----------------------------------------------------------------------------------------
  nf-core/sarek Analysis Pipeline.
  @Homepage
  https://sarek.scilifelab.se/
@@ -60,12 +62,17 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
     exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
 }
 
+params.step = 'mapping'
+params.test = false
+params.sampleDir = false
+params.tools = false
+
 stepList = defineStepList()
 step = params.step ? params.step.toLowerCase() : ''
 if (step == 'preprocessing' || step == '') step = 'mapping'
 if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information'
 if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information'
-if (step == 'mapping' && !checkExactlyOne([params.test, params.sample, params.sampleDir]))
+if (step == 'mapping' && ([params.test, params.sample, params.sampleDir].size == 1))
   exit 1, 'Please define which samples to work on by providing exactly one of the --test, --sample or --sampleDir options'
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
@@ -112,6 +119,33 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
    if (params.test || step != 'mapping') tsvPath = tsvPaths[step]
  }
 
+ // Set up the inputFiles and bamFiles channels. One of them will remain empty
+ inputFiles = Channel.empty()
+ bamFiles = Channel.empty()
+ if (tsvPath) {
+   tsvFile = file(tsvPath)
+   switch (step) {
+     case 'mapping': inputFiles = extractSample(tsvFile); break
+     case 'recalibrate': bamFiles = extractRecal(tsvFile); break
+     default: exit 1, "Unknown step ${step}"
+   }
+ } else if (params.sampleDir) {
+   if (step != 'mapping') exit 1, '--sampleDir does not support steps other than "mapping"'
+   inputFiles = extractFastqFromDir(params.sampleDir)
+   (inputFiles, fastqTmp) = inputFiles.into(2)
+   fastqTmp.toList().subscribe onNext: {
+     if (it.size() == 0) {
+       exit 1, "No FASTQ files found in --sampleDir directory '${params.sampleDir}'"
+     }
+   }
+   tsvFile = params.sampleDir  // used in the reports
+ } else exit 1, 'No sample were defined, see --help'
+
+ if (step == 'recalibrate') (patientGenders, bamFiles) = extractGenders(bamFiles)
+ else (patientGenders, inputFiles) = extractGenders(inputFiles)
+
+
+
 // Header log info
 log.info nfcoreHeader()
 def summary = [:]
@@ -164,20 +198,17 @@ ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style
  * Parse software version numbers
  */
 process get_software_versions {
-    publishDir "${params.outdir}/pipeline_info", mode: 'copy',
-    saveAs: {filename ->
-        if (filename.indexOf(".csv") > 0) filename
-        else null
-    }
+   publishDir path:"${params.outdir}/pipeline_info", mode: params.publishDirMode
 
     output:
     file 'software_versions_mqc.yaml' into software_versions_yaml
-    file "software_versions.csv"
 
     script:
     """
+    alleleCounter --version &> v_allelecount.txt  || true
     bcftools version > v_bcftools.txt 2>&1 || true
     bwa &> v_bwa.txt 2>&1 || true
+    cat ${baseDir}/scripts/ascat.R | grep "ASCAT version" &> v_ascat.txt  || true
     configManta.py --version > v_manta.txt 2>&1 || true
     configureStrelkaGermlineWorkflow.py --version > v_strelka.txt 2>&1 || true
     echo "${workflow.manifest.version}" &> v_pipeline.txt 2>&1 || true
@@ -188,6 +219,7 @@ process get_software_versions {
     gatk ApplyBQSR --help 2>&1 | grep Version: > v_gatk.txt 2>&1 || true
     multiqc --version &> v_multiqc.txt 2>&1 || true
     qualimap --version &> v_qualimap.txt 2>&1 || true
+    R --version &> v_r.txt  || true
     samtools --version &> v_samtools.txt 2>&1 || true
     vcftools --version &> v_vcftools.txt 2>&1 || true
     vep --help &> v_vep.txt 2>&1 || true
@@ -196,6 +228,9 @@ process get_software_versions {
     """
 }
 
+
+
+
 /*
  * Completion e-mail notification
  */
@@ -324,12 +359,12 @@ def nfcoreHeader(){
     ${c_blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${c_yellow}}  {${c_reset}
     ${c_blue}  | \\| |       \\__, \\__/ |  \\ |___     ${c_green}\\`-._,-`-,${c_reset}
                                             ${c_green}`._,._,\'${c_reset}
-           ____        _____               _
-         .' _  `.     / ____|             | |
-        /  |\\`-_ \\   | (___  ___  _ __ __ | | __
-       |   | \\  `-|   \\___ \\/__ \\| ´__/ _\\| |/ /
-        \\ |   \\  /    ____) | __ | | |  __|   <
-         `|____\\'    |_____/\\____|_|  \\__/|_|\\_\\
+    ${c_black}       ____      ${c_blue}  _____               _ ${c_reset}
+    ${c_black}     .' ${c_green}_${c_black}  `.    ${c_blue} / ____|             | | ${c_reset}
+    ${c_black}    /  ${c_green}|\\${c_white}`-_${c_black} \\ ${c_blue}  | (___  ___  _ __ __ | | __ ${c_reset}
+    ${c_black}   |   ${c_green}| \\  ${c_white}`-${c_black}| ${c_blue}  \\___ \\/__ \\| ´__/ _\\| |/ / ${c_reset}
+    ${c_black}    \\ ${c_green}|   \\  ${c_black}/ ${c_blue}   ____) | __ | | |  __|   < ${c_reset}
+    ${c_black}     `${c_green}|${c_black}____${c_green}\\${c_black}'   ${c_blue} |_____/\\____|_|  \\__/|_|\\_\\ ${c_reset}
 
     ${c_purple}  nf-core/sarek v${workflow.manifest.version}${c_reset}
     ${c_dim}----------------------------------------------------${c_reset}
@@ -363,10 +398,10 @@ def checkHostname(){
 ========================================================================================
 */
 
-def checkExactlyOne(list) {
-  def n = 0
-  list.each{n += it ? 1 : 0}
-  return n == 1
+// Check if a row has the expected number of item
+def checkNumberOfItem(row, number) {
+  if (row.size() != number) exit 1, "Malformed row in TSV file: ${row}, see --help for more information"
+  return true
 }
 
 // Check parameter existence
@@ -383,6 +418,7 @@ def checkParameterList(list, realList) {
   return list.every{ checkParameterExistence(it, realList) }
 }
 
+// Check if params.item exists and return params.genomes[params.genome].item otherwise
 def checkParamReturnFile(item) {
   params."${item}" = params.genomes[params.genome]."${item}"
   return file(params."${item}")
@@ -409,6 +445,7 @@ def checkReferenceMap(referenceMap) {
   }
 }
 
+// Define map of reference depending of tools and step
 def defineReferenceMap(step, tools) {
   def referenceMap =
   [
@@ -439,6 +476,7 @@ def defineReferenceMap(step, tools) {
   return referenceMap
 }
 
+// Define list of available step
 def defineStepList() {
   return [
     'mapping',
@@ -448,6 +486,7 @@ def defineStepList() {
   ]
 }
 
+// Define list of available tools
 def defineToolList() {
   return [
     'ascat',
@@ -458,3 +497,111 @@ def defineToolList() {
     'strelka'
   ]
 }
+
+ // Create a channel of germline FASTQs from a directory pattern: "my_samples/*/"
+ // All FASTQ files in subdirectories are collected and emitted;
+ // they must have _R1_ and _R2_ in their names.
+def extractFastqFromDir(pattern) {
+  def fastq = Channel.create()
+  // a temporary channel does all the work
+  Channel
+    .fromPath(pattern, type: 'dir')
+    .ifEmpty { error "No directories found matching pattern '${pattern}'" }
+    .subscribe onNext: { sampleDir ->
+      // the last name of the sampleDir is assumed to be a unique sample id
+      sampleId = sampleDir.getFileName().toString()
+
+      for (path1 in file("${sampleDir}/**_R1_*.fastq.gz")) {
+        assert path1.getName().contains('_R1_')
+        path2 = file(path1.toString().replace('_R1_', '_R2_'))
+        if (!path2.exists()) error "Path '${path2}' not found"
+        (flowcell, lane) = flowcellLaneFromFastq(path1)
+        patient = sampleId
+        gender = 'ZZ'  // unused
+        status = 0  // normal (not tumor)
+        rgId = "${flowcell}.${sampleId}.${lane}"
+        result = [patient, gender, status, sampleId, rgId, path1, path2]
+        fastq.bind(result)
+      }
+  }, onComplete: { fastq.close() }
+  fastq
+}
+
+// Extract gender from Channel as it's only used for CNVs
+def extractGenders(channel) {
+  def genders = [:]
+  channel = channel.map{ it ->
+    def idPatient = it[0]
+    def gender = it[1]
+    genders[idPatient] = gender
+    [idPatient] + it[2..-1]
+  }
+  [genders, channel]
+}
+
+// Channeling the TSV file containing FASTQ or BAM
+// Format is: "subject gender status sample lane fastq1 fastq2"
+// or: "subject gender status sample lane bam"
+def extractSample(tsvFile) {
+  Channel.from(tsvFile)
+  .splitCsv(sep: '\t')
+  .map { row ->
+    def idPatient  = row[0]
+    def gender     = row[1]
+    def status     = returnStatus(row[2].toInteger())
+    def idSample   = row[3]
+    def idRun      = row[4]
+    def file1      = returnFile(row[5])
+    def file2      = file("null")
+    if (hasExtension(file1,"fastq.gz") || hasExtension(file1,"fq.gz")) {
+      checkNumberOfItem(row, 7)
+      file2 = returnFile(row[6])
+      if (!hasExtension(file2,"fastq.gz") && !hasExtension(file2,"fq.gz")) exit 1, "File: ${file2} has the wrong extension. See --help for more information"
+    }
+    else if (hasExtension(file1,"bam")) checkNumberOfItem(row, 6)
+    else "No recognisable extention for input file: ${file1}"
+
+    [idPatient, gender, status, idSample, idRun, file1, file2]
+  }
+}
+
+// Channeling the TSV file containing Recalibration Tables.
+// Format is: "subject gender status sample bam bai recalTables"
+def extractRecal(tsvFile) {
+  Channel.from(tsvFile)
+    .splitCsv(sep: '\t')
+    .map { row ->
+    checkNumberOfItem(row, 7)
+    def idPatient  = row[0]
+    def gender     = row[1]
+    def status     = returnStatus(row[2].toInteger())
+    def idSample   = row[3]
+    def bamFile    = returnFile(row[4])
+    def baiFile    = returnFile(row[5])
+    def recalTable = returnFile(row[6])
+
+    if (!hasExtension(bamFile,"bam")) exit 1, "File: ${bamFile} has the wrong extension. See --help for more information"
+    if (!hasExtension(baiFile,"bai")) exit 1, "File: ${baiFile} has the wrong extension. See --help for more information"
+    if (!hasExtension(recalTable,"recal.table")) exit 1, "File: ${recalTable} has the wrong extension. See --help for more information"
+
+    [ idPatient, gender, status, idSample, bamFile, baiFile, recalTable ]
+  }
+}
+
+// Check file extension
+def hasExtension(it, extension) {
+  it.toString().toLowerCase().endsWith(extension.toLowerCase())
+}
+
+// Return file if it exists
+def returnFile(it) {
+  if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information"
+  return file(it)
+}
+
+// Return status [0,1]
+// 0 == Normal, 1 == Tumor
+def returnStatus(it) {
+  if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information"
+  return it
+}

From 9bc65d1815265cb75565218196e84d6117021e4b Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 11:52:52 +0200
Subject: [PATCH 15/28] fix docker owner

---
 nextflow.config | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 6e7ff7b57d..a43d967601 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -58,7 +58,13 @@ profiles {
   awsbatch { includeConfig 'conf/awsbatch.config' }
   conda { process.conda = "$baseDir/environment.yml" }
   debug { process.beforeScript = 'echo $HOSTNAME' }
-  docker { docker.enabled = true }
+  docker {
+    docker {
+      enabled = true
+      fixOwnership = true
+      runOptions = "-u \$(id -u):\$(id -g)"
+    }
+  }
   singularity { singularity.enabled = true }
   test { includeConfig 'conf/test.config' }
 }

From 806cab59c8d7a1e19c7b8e61f3b221d9f83576d2 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 13:18:42 +0200
Subject: [PATCH 16/28] update tests

---
 .travis.yml | 6 +++---
 Jenkinsfile | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 03cf98bb46..7941be71fb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ install:
 jobs:
   include:
     - stage: built
-      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git test-data
-      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir test-data/reference --outdir References
+      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
+      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
     - stage: test
-      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --help
+      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sample data/testdata/tsv/tiny-multiple.tsv --publishDirMode link
diff --git a/Jenkinsfile b/Jenkinsfile
index b54e5c7deb..ed45e1923b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -13,13 +13,13 @@ pipeline {
         }
         stage('Build') {
             steps {
-              sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git test-data"
-              sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir test-data/reference --outdir References"
+              sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data"
+              sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link"
             }
         }
         stage('Test') {
             steps {
-                sh "nextflow run main.nf -profile docker --help"
+                sh "nextflow run main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sample data/testdata/tsv/tiny-multiple.tsv --publishDirMode link"
             }
         }
     }

From a58eaa255f59b1321b8eb03ae299350103904054 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 13:19:04 +0200
Subject: [PATCH 17/28] sort params

---
 conf/base.config | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index bb54f1e010..14c8f70aef 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -17,8 +17,8 @@ process {
   time = { check_max( 2.h * task.attempt, 'time' ) }
 
   errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
-  maxRetries = 1
   maxErrors = '-1'
+  maxRetries = 1
 
   // Process-specific resource requirements
   // TODO nf-core: Customise requirements for specific processes.
@@ -27,8 +27,9 @@ process {
 
 params {
   // Defaults only, expecting to be overwritten
-  max_memory = 128.GB
+  igenomes_base = 's3://ngi-igenomes/igenomes/'
+  markdup_java_options = '"-Xms4000m -Xmx7g"' //Established values for markDuplicate memory consumption, see issue PR #689 for details
   max_cpus = 16
+  max_memory = 128.GB
   max_time = 240.h
-  igenomes_base = 's3://ngi-igenomes/igenomes/'
 }

From b0fb4b93531fbef8cbe7207939a76f7b50ff9a34 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 13:19:40 +0200
Subject: [PATCH 18/28] add preprocessing

---
 main.nf | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 353 insertions(+), 6 deletions(-)

diff --git a/main.nf b/main.nf
index 2060a2dec6..454d301de0 100644
--- a/main.nf
+++ b/main.nf
@@ -62,9 +62,12 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
     exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
 }
 
+params.noReports = false
+params.sampleDir = false
+params.sequencing_center = null
 params.step = 'mapping'
+params.targetBED = null
 params.test = false
-params.sampleDir = false
 params.tools = false
 
 stepList = defineStepList()
@@ -113,8 +116,8 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
  if (!params.sample && !params.sampleDir) {
    tsvPaths = [
        'mapping':        "${workflow.projectDir}/Sarek-data/testdata/tsv/tiny.tsv",
-       'recalibrate':    "${params.outDir}/Preprocessing/DuplicateMarked/duplicateMarked.tsv",
-       'variantcalling': "${params.outDir}/Preprocessing/Recalibrated/recalibrated.tsv"
+       'recalibrate':    "${params.outdir}/Preprocessing/DuplicateMarked/duplicateMarked.tsv",
+       'variantcalling': "${params.outdir}/Preprocessing/Recalibrated/recalibrated.tsv"
    ]
    if (params.test || step != 'mapping') tsvPath = tsvPaths[step]
  }
@@ -144,8 +147,6 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
  if (step == 'recalibrate') (patientGenders, bamFiles) = extractGenders(bamFiles)
  else (patientGenders, inputFiles) = extractGenders(inputFiles)
 
-
-
 // Header log info
 log.info nfcoreHeader()
 def summary = [:]
@@ -198,7 +199,7 @@ ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style
  * Parse software version numbers
  */
 process get_software_versions {
-   publishDir path:"${params.outdir}/pipeline_info", mode: params.publishDirMode
+    publishDir path:"${params.outdir}/pipeline_info", mode: params.publishDirMode
 
     output:
     file 'software_versions_mqc.yaml' into software_versions_yaml
@@ -228,8 +229,354 @@ process get_software_versions {
     """
 }
 
+/*
+========================================================================================
+                         PREPROCESSING
+========================================================================================
+*/
+
+// STEP ONE: MAPPING
+
+(inputFiles, inputFilesforFastQC) = inputFiles.into(2)
+
+inputFiles = inputFiles.dump(tag:'INPUT')
+
+process RunFastQC {
+  tag {idPatient + "-" + idRun}
+
+  publishDir "${params.outdir}/Reports/${idSample}/FastQC/${idRun}", mode: params.publishDirMode
+
+  input:
+    set idPatient, status, idSample, idRun, file(inputFile1), file(inputFile2) from inputFilesforFastQC
+
+  output:
+    file "*_fastqc.{zip,html}" into fastQCreport
+
+  when: step == 'mapping' && !params.noReports
+
+  script:
+  inputFiles = (hasExtension(inputFile1,"fastq.gz") || hasExtension(inputFile1,"fq.gz")) ? "${inputFile1} ${inputFile2}" : "${inputFile1}"
+  """
+  fastqc -t 2 -q ${inputFiles}
+  """
+}
+
+fastQCreport.dump(tag:'FastQC')
+
+process MapReads {
+  tag {idPatient + "-" + idRun}
+
+  input:
+    set idPatient, status, idSample, idRun, file(inputFile1), file(inputFile2) from inputFiles
+    set file(genomeFile), file(bwaIndex) from Channel.value([referenceMap.genomeFile, referenceMap.bwaIndex])
+
+  output:
+    set idPatient, status, idSample, idRun, file("${idRun}.bam") into (mappedBam, mappedBamForQC)
+
+  when: step == 'mapping'
+
+  script:
+  CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ""
+  readGroup = "@RG\\tID:${idRun}\\t${CN}PU:${idRun}\\tSM:${idSample}\\tLB:${idSample}\\tPL:illumina"
+  // adjust mismatch penalty for tumor samples
+  extra = status == 1 ? "-B 3" : ""
+  if (hasExtension(inputFile1,"fastq.gz") || hasExtension(inputFile1,"fq.gz"))
+    """
+    bwa mem -K 100000000 -R \"${readGroup}\" ${extra} -t ${task.cpus} -M \
+    ${genomeFile} ${inputFile1} ${inputFile2} | \
+    samtools sort --threads ${task.cpus} -m 2G - > ${idRun}.bam
+    """
+  else if (hasExtension(inputFile1,"bam"))
+  // -K is an hidden option, used to fix the number of reads processed by bwa mem
+  // Chunk size can affect bwa results, if not specified, the number of threads can change
+  // which can give not deterministic result.
+  // cf https://github.com/CCDG/Pipeline-Standardization/blob/master/PipelineStandard.md
+  // and https://github.com/gatk-workflows/gatk4-data-processing/blob/8ffa26ff4580df4ac3a5aa9e272a4ff6bab44ba2/processing-for-variant-discovery-gatk4.b37.wgs.inputs.json#L29
+    """
+    gatk --java-options -Xmx${task.memory.toGiga()}g \
+    SamToFastq \
+    --INPUT=${inputFile1} \
+    --FASTQ=/dev/stdout \
+    --INTERLEAVE=true \
+    --NON_PF=true \
+    | \
+    bwa mem -K 100000000 -p -R \"${readGroup}\" ${extra} -t ${task.cpus} -M ${genomeFile} \
+    /dev/stdin - 2> >(tee ${inputFile1}.bwa.stderr.log >&2) \
+    | \
+    samtools sort --threads ${task.cpus} -m 2G - > ${idRun}.bam
+    """
+}
+
+mappedBam = mappedBam.dump(tag:'Mapped BAM')
+
+process RunBamQCmapped {
+  tag {idPatient + "-" + idSample}
+
+  publishDir "${params.outdir}/Reports/${idSample}/bamQC", mode: params.publishDirMode
+
+  input:
+    set idPatient, status, idSample, idRun, file(bam) from mappedBamForQC
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
+
+  output:
+    file("${bam.baseName}") into bamQCmappedReport
+
+  when: !params.noReports
+
+  script:
+  use_bed = params.targetBED ? "-gff ${targetBED}" : ''
+  """
+  qualimap --java-mem-size=${task.memory.toGiga()}G \
+  bamqc \
+  -bam ${bam} \
+  --paint-chromosome-limits \
+  --genome-gc-distr HUMAN \
+  $use_bed \
+  -nt ${task.cpus} \
+  -skip-duplicated \
+  --skip-dup-mode 0 \
+  -outdir ${bam.baseName} \
+  -outformat HTML
+  """
+}
+
+bamQCmappedReport.dump(tag:'BamQC BAM')
+
+// Sort bam whether they are standalone or should be merged
+
+singleBam = Channel.create()
+groupedBam = Channel.create()
+mappedBam.groupTuple(by:[0,1,2])
+  .choice(singleBam, groupedBam) {it[3].size() > 1 ? 1 : 0}
+singleBam = singleBam.map {
+  idPatient, status, idSample, idRun, bam ->
+  [idPatient, status, idSample, bam]
+}
 
+process MergeBams {
+  tag {idPatient + "-" + idSample}
+
+  input:
+    set idPatient, status, idSample, idRun, file(bam) from groupedBam
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.bam") into mergedBam
+
+  when: step == 'mapping'
+
+  script:
+  """
+  samtools merge --threads ${task.cpus} ${idSample}.bam ${bam}
+  """
+}
+
+singleBam = singleBam.dump(tag:'Single BAM')
+mergedBam = mergedBam.dump(tag:'Merged BAM')
+mergedBam = mergedBam.mix(singleBam)
+mergedBam = mergedBam.dump(tag:'BAM for MD')
+
+process MarkDuplicates {
+  tag {idPatient + "-" + idSample}
+
+  publishDir params.outdir, mode: params.publishDirMode,
+    saveAs: {
+      if (it == "${idSample}.bam.metrics") "Reports/${idSample}/MarkDuplicates/${it}"
+      else "Preprocessing/${idSample}/DuplicateMarked/${it}"
+    }
+
+  input:
+    set idPatient, status, idSample, file("${idSample}.bam") from mergedBam
+
+  output:
+    set idPatient, file("${idSample}_${status}.md.bam"), file("${idSample}_${status}.md.bai") into duplicateMarkedBams
+    set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai") into markDuplicatesTSV
+    file ("${idSample}.bam.metrics") into markDuplicatesReport
+
+  when: step == 'mapping'
+
+  script:
+  markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" +  (task.memory.toGiga() / 2 ).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
+  """
+  gatk --java-options ${markdup_java_options} \
+  MarkDuplicates \
+  --MAX_RECORDS_IN_RAM 50000 \
+  --INPUT ${idSample}.bam \
+  --METRICS_FILE ${idSample}.bam.metrics \
+  --TMP_DIR . \
+  --ASSUME_SORT_ORDER coordinate \
+  --CREATE_INDEX true \
+  --OUTPUT ${idSample}_${status}.md.bam
+  """
+}
+
+// Creating a TSV file to restart from this step
+markDuplicatesTSV.map { idPatient, status, idSample, bam, bai ->
+  gender = patientGenders[idPatient]
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\n"
+}.collectFile(
+  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/DuplicateMarked"
+)
+
+duplicateMarkedBams = duplicateMarkedBams.map {
+    idPatient, bam, bai ->
+    tag = bam.baseName.tokenize('.')[0]
+    status   = tag[-1..-1].toInteger()
+    idSample = tag.take(tag.length()-2)
+    [idPatient, status, idSample, bam, bai]
+}
+
+duplicateMarkedBams = duplicateMarkedBams.dump(tag:'MD BAM')
+
+(mdBam, mdBamToJoin) = duplicateMarkedBams.into(2)
+
+process CreateRecalibrationTable {
+  tag {idPatient + "-" + idSample}
+
+  publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked", mode: params.publishDirMode, overwrite: false
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai) from mdBam // realignedBam
+    set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(knownIndels), file(knownIndelsIndex), file(intervals) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict,
+      referenceMap.dbsnp,
+      referenceMap.dbsnpIndex,
+      referenceMap.knownIndels,
+      referenceMap.knownIndelsIndex,
+      referenceMap.intervals,
+    ])
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.recal.table") into recalibrationTable
+    set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai"), val("${idSample}.recal.table") into recalibrationTableTSV
+
+  when: step == 'mapping'
+
+  script:
+  known = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
+  """
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
+  BaseRecalibrator \
+  --input ${bam} \
+  --output ${idSample}.recal.table \
+  --tmp-dir /tmp \
+  -R ${genomeFile} \
+  -L ${intervals} \
+  --known-sites ${dbsnp} \
+  ${known} \
+  --verbosity INFO
+  """
+}
+
+// Create a TSV file to restart from this step
+recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
+  gender = patientGenders[idPatient]
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${recalTable}\n"
+}.collectFile(
+  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/DuplicateMarked"
+)
+
+recalibrationTable = mdBamToJoin.join(recalibrationTable, by:[0,1,2])
+
+if (step == 'recalibrate') recalibrationTable = bamFiles
+
+recalibrationTable = recalibrationTable.dump(tag:'recal.table')
+
+process RecalibrateBam {
+  tag {idPatient + "-" + idSample}
+
+  publishDir "${params.outdir}/Preprocessing/${idSample}/Recalibrated", mode: params.publishDirMode
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable
+    set file(genomeFile), file(genomeIndex), file(genomeDict), file(intervals) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict,
+      referenceMap.intervals,
+    ])
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
+    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
+
+  script:
+  """
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
+  ApplyBQSR \
+  -R ${genomeFile} \
+  --input ${bam} \
+  --output ${idSample}.recal.bam \
+  -L ${intervals} \
+  --create-output-bam-index true \
+  --bqsr-recal-file ${recalibrationReport}
+  """
+}
+// Creating a TSV file to restart from this step
+recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
+  gender = patientGenders[idPatient]
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/Recalibrated/${bam}\t${params.outdir}/Preprocessing/${idSample}/Recalibrated/${bai}\n"
+}.collectFile(
+  name: 'recalibrated.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/Recalibrated"
+)
+
+recalibratedBam.dump(tag:'recal.bam')
+
+// Remove recalTable from Channels to match inputs for Process to avoid:
+// WARN: Input tuple does not match input set cardinality declared by process...
+(bamForBamQC, bamForSamToolsStats) = recalibratedBamForStats.map{ it[0..4] }.into(2)
+
+process RunSamtoolsStats {
+  tag {idPatient + "-" + idSample}
+
+  publishDir "${params.outdir}/Reports/${idSample}/SamToolsStats", mode: params.publishDirMode
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats
+
+  output:
+    file ("${bam}.samtools.stats.out") into samtoolsStatsReport
+
+  when: !params.noReports
+
+  script:
+  """
+  samtools stats ${bam} > ${bam}.samtools.stats.out
+  """
+}
+
+samtoolsStatsReport.dump(tag:'SAMTools')
+
+process RunBamQCrecalibrated {
+  tag {idPatient + "-" + idSample}
+
+  publishDir "${params.outdir}/Reports/${idSample}/bamQC", mode: params.publishDirMode
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC
+
+  output:
+    file("${bam.baseName}") into bamQCrecalibratedReport
+
+  when: !params.noReports
+
+  script:
+  """
+  qualimap --java-mem-size=${task.memory.toGiga()}G \
+  bamqc \
+  -bam ${bam} \
+  --paint-chromosome-limits \
+  --genome-gc-distr HUMAN \
+  -nt ${task.cpus} \
+  -skip-duplicated \
+  --skip-dup-mode 0 \
+  -outdir ${bam.baseName} \
+  -outformat HTML
+  """
+}
 
+bamQCrecalibratedReport.dump(tag:'BamQC')
 
 /*
  * Completion e-mail notification

From ba09b0acd7ea2e77e056f5f0999091f936fe37e3 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 13:21:47 +0200
Subject: [PATCH 19/28] improve TSV localisation

---
 main.nf | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/main.nf b/main.nf
index 454d301de0..83217d7ce3 100644
--- a/main.nf
+++ b/main.nf
@@ -116,8 +116,8 @@ ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
  if (!params.sample && !params.sampleDir) {
    tsvPaths = [
        'mapping':        "${workflow.projectDir}/Sarek-data/testdata/tsv/tiny.tsv",
-       'recalibrate':    "${params.outdir}/Preprocessing/DuplicateMarked/duplicateMarked.tsv",
-       'variantcalling': "${params.outdir}/Preprocessing/Recalibrated/recalibrated.tsv"
+       'recalibrate':    "${params.outdir}/Preprocessing/TSV/duplicateMarked.tsv",
+       'variantcalling': "${params.outdir}/Preprocessing/TSV/recalibrated.tsv"
    ]
    if (params.test || step != 'mapping') tsvPath = tsvPaths[step]
  }
@@ -414,7 +414,7 @@ markDuplicatesTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]
   "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\n"
 }.collectFile(
-  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/DuplicateMarked"
+  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
 )
 
 duplicateMarkedBams = duplicateMarkedBams.map {
@@ -474,7 +474,7 @@ recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   gender = patientGenders[idPatient]
   "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${recalTable}\n"
 }.collectFile(
-  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/DuplicateMarked"
+  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
 )
 
 recalibrationTable = mdBamToJoin.join(recalibrationTable, by:[0,1,2])
@@ -518,7 +518,7 @@ recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]
   "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/Recalibrated/${bam}\t${params.outdir}/Preprocessing/${idSample}/Recalibrated/${bai}\n"
 }.collectFile(
-  name: 'recalibrated.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/Recalibrated"
+  name: 'recalibrated.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
 )
 
 recalibratedBam.dump(tag:'recal.bam')

From 3d8208abca5959b57c8f082658605d58dd1629b5 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 14:26:17 +0200
Subject: [PATCH 20/28] improve multiple TSV

---
 main.nf | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/main.nf b/main.nf
index 83217d7ce3..4ac7662e73 100644
--- a/main.nf
+++ b/main.nf
@@ -389,7 +389,6 @@ process MarkDuplicates {
 
   output:
     set idPatient, file("${idSample}_${status}.md.bam"), file("${idSample}_${status}.md.bai") into duplicateMarkedBams
-    set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai") into markDuplicatesTSV
     file ("${idSample}.bam.metrics") into markDuplicatesReport
 
   when: step == 'mapping'
@@ -409,14 +408,6 @@ process MarkDuplicates {
   """
 }
 
-// Creating a TSV file to restart from this step
-markDuplicatesTSV.map { idPatient, status, idSample, bam, bai ->
-  gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\n"
-}.collectFile(
-  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
-)
-
 duplicateMarkedBams = duplicateMarkedBams.map {
     idPatient, bam, bai ->
     tag = bam.baseName.tokenize('.')[0]
@@ -469,7 +460,8 @@ process CreateRecalibrationTable {
   """
 }
 
-// Create a TSV file to restart from this step
+(recalibrationTableTSV, recalibrationTableSampleTSV) = recalibrationTableTSV.into(2)
+// Create TSV files to restart from this step
 recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   gender = patientGenders[idPatient]
   "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${recalTable}\n"
@@ -477,6 +469,13 @@ recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
 )
 
+recalibrationTableSampleTSV
+  .collectFile(storeDir: "${params.outdir}/Preprocessing/TSV") {
+    idPatient, status, idSample, bam, bai, recalTable ->
+    gender = patientGenders[idPatient]
+    ["duplicateMarked_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bam}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${bai}\t${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/${recalTable}\n"]
+}
+
 recalibrationTable = mdBamToJoin.join(recalibrationTable, by:[0,1,2])
 
 if (step == 'recalibrate') recalibrationTable = bamFiles
@@ -513,6 +512,9 @@ process RecalibrateBam {
   --bqsr-recal-file ${recalibrationReport}
   """
 }
+
+
+(recalibratedBamTSV, recalibratedBamSampleTSV) = recalibratedBamTSV.into(2)
 // Creating a TSV file to restart from this step
 recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]
@@ -521,6 +523,13 @@ recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
   name: 'recalibrated.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
 )
 
+recalibratedBamSampleTSV
+  .collectFile(storeDir: "${params.outdir}/Preprocessing/TSV") {
+    idPatient, status, idSample, bam, bai ->
+    gender = patientGenders[idPatient]
+    ["recalibrated_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outdir}/Preprocessing/${idSample}/Recalibrated/${bam}\t${params.outdir}/Preprocessing/${idSample}/Recalibrated/${bai}\n"]
+}
+
 recalibratedBam.dump(tag:'recal.bam')
 
 // Remove recalTable from Channels to match inputs for Process to avoid:

From 0b777d458fce6bab060bb37f68bf3a972c15d443 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 14:43:51 +0200
Subject: [PATCH 21/28] add parrallelized BaseRecalibrator

---
 main.nf | 110 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 98 insertions(+), 12 deletions(-)

diff --git a/main.nf b/main.nf
index 4ac7662e73..8631369549 100644
--- a/main.nf
+++ b/main.nf
@@ -63,6 +63,7 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
 }
 
 params.noReports = false
+params.nucleotidesPerSecond = 1000.0
 params.sampleDir = false
 params.sequencing_center = null
 params.step = 'mapping'
@@ -420,14 +421,76 @@ duplicateMarkedBams = duplicateMarkedBams.dump(tag:'MD BAM')
 
 (mdBam, mdBamToJoin) = duplicateMarkedBams.into(2)
 
+process CreateIntervalBeds {
+  tag {intervals.fileName}
+
+  input:
+    file(intervals) from Channel.value(referenceMap.intervals)
+
+  output:
+    file '*.bed' into bedIntervals mode flatten
+
+  script:
+  // If the interval file is BED format, the fifth column is interpreted to
+  // contain runtime estimates, which is then used to combine short-running jobs
+  if (hasExtension(intervals,"bed"))
+    """
+    awk -vFS="\t" '{
+      t = \$5  # runtime estimate
+      if (t == "") {
+        # no runtime estimate in this row, assume default value
+        t = (\$3 - \$2) / ${params.nucleotidesPerSecond}
+      }
+      if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) {
+        # start a new chunk
+        name = sprintf("%s_%d-%d.bed", \$1, \$2+1, \$3)
+        chunk = 0
+        longest = 0
+      }
+      if (t > longest)
+        longest = t
+      chunk += t
+      print \$0 > name
+    }' ${intervals}
+    """
+  else
+    """
+    awk -vFS="[:-]" '{
+      name = sprintf("%s_%d-%d", \$1, \$2, \$3);
+      printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed"
+    }' ${intervals}
+    """
+}
+
+bedIntervals = bedIntervals
+  .map { intervalFile ->
+    def duration = 0.0
+    for (line in intervalFile.readLines()) {
+      final fields = line.split('\t')
+      if (fields.size() >= 5) duration += fields[4].toFloat()
+      else {
+        start = fields[1].toInteger()
+        end = fields[2].toInteger()
+        duration += (end - start) / params.nucleotidesPerSecond
+      }
+    }
+    [duration, intervalFile]
+  }.toSortedList({ a, b -> b[0] <=> a[0] })
+  .flatten().collate(2)
+  .map{duration, intervalFile -> intervalFile}
+
+bedIntervals = bedIntervals.dump(tag:'bedintervals')
+
+bamForBaseRecalibrator = mdBam.combine(bedIntervals)
+
 process CreateRecalibrationTable {
-  tag {idPatient + "-" + idSample}
+  tag {idPatient + "-" + idSample + "-" + intervalBed}
 
   publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked", mode: params.publishDirMode, overwrite: false
 
   input:
-    set idPatient, status, idSample, file(bam), file(bai) from mdBam // realignedBam
-    set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(knownIndels), file(knownIndelsIndex), file(intervals) from Channel.value([
+    set idPatient, status, idSample, file(bam), file(bai), file(intervalBed) from bamForBaseRecalibrator
+    set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(knownIndels), file(knownIndelsIndex) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex,
       referenceMap.genomeDict,
@@ -435,32 +498,56 @@ process CreateRecalibrationTable {
       referenceMap.dbsnpIndex,
       referenceMap.knownIndels,
       referenceMap.knownIndelsIndex,
-      referenceMap.intervals,
     ])
 
   output:
-    set idPatient, status, idSample, file("${idSample}.recal.table") into recalibrationTable
-    set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai"), val("${idSample}.recal.table") into recalibrationTableTSV
+    set idPatient, status, idSample, file("${intervalBed.baseName}_${idSample}.recal.table") into recalIntervals
 
   when: step == 'mapping'
 
   script:
   known = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
+  // --use-original-qualities ???
   """
   gatk --java-options -Xmx${task.memory.toGiga()}g \
   BaseRecalibrator \
-  --input ${bam} \
-  --output ${idSample}.recal.table \
+  -I ${bam} \
+  -O ${intervalBed.baseName}_${idSample}.recal.table \
   --tmp-dir /tmp \
   -R ${genomeFile} \
-  -L ${intervals} \
+  -L ${intervalBed} \
   --known-sites ${dbsnp} \
   ${known} \
   --verbosity INFO
   """
 }
 
-(recalibrationTableTSV, recalibrationTableSampleTSV) = recalibrationTableTSV.into(2)
+recalIntervals = recalIntervals.groupTuple(by:[0,1,2])
+
+process GatherBQSRReports {
+  tag {idPatient + "-" + idSample}
+
+  publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked", mode: params.publishDirMode, overwrite: false
+
+  input:
+    set idPatient, status, idSample, file(recalTable) from recalIntervals
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.recal.table") into recalibrationTable
+    set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai"), val("${idSample}.recal.table") into (recalibrationTableTSV, recalibrationTableSampleTSV)
+
+  when: step == 'mapping'
+
+  script:
+  recal = recalTable.collect{ "-I ${it}" }.join(' ')
+  """
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
+  GatherBQSRReports \
+  ${recal} \
+  -O ${idSample}.recal.table \
+  """
+}
+
 // Create TSV files to restart from this step
 recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   gender = patientGenders[idPatient]
@@ -498,7 +585,7 @@ process RecalibrateBam {
 
   output:
     set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
-    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
+    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into (recalibratedBamTSV, recalibratedBamSampleTSV)
 
   script:
   """
@@ -514,7 +601,6 @@ process RecalibrateBam {
 }
 
 
-(recalibratedBamTSV, recalibratedBamSampleTSV) = recalibratedBamTSV.into(2)
 // Creating a TSV file to restart from this step
 recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]

From 76bbfaf75c5e017013418f1d67a80b83887180ce Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 14:49:35 +0200
Subject: [PATCH 22/28] smaller test on travisCI

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7941be71fb..3c1bfc69a3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,4 +38,4 @@ jobs:
       script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
       script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
     - stage: test
-      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sample data/testdata/tsv/tiny-multiple.tsv --publishDirMode link
+      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link

From aa8d70fdff6b015fefaa4900bbf148e304561bcb Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 14:54:58 +0200
Subject: [PATCH 23/28] try to fix path to data

---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3c1bfc69a3..cded9b6e5e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ install:
 jobs:
   include:
     - stage: built
-      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
-      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
+      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git ${TRAVIS_BUILD_DIR}/data
+      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir ${TRAVIS_BUILD_DIR}/data/reference --outdir ${TRAVIS_BUILD_DIR}/references --publishDirMode link
     - stage: test
-      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link
+      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base ${TRAVIS_BUILD_DIR}/references --sampleDir ${TRAVIS_BUILD_DIR}/data/testdata/tiny/normal --publishDirMode link

From af594adcdf38e82ab0dcd1b61481dab8ed1ae288 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 15:05:31 +0200
Subject: [PATCH 24/28] include building reference when testing...

---
 .travis.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index cded9b6e5e..61560bab33 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,8 +34,7 @@ install:
 
 jobs:
   include:
-    - stage: built
-      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git ${TRAVIS_BUILD_DIR}/data
-      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir ${TRAVIS_BUILD_DIR}/data/reference --outdir ${TRAVIS_BUILD_DIR}/references --publishDirMode link
-    - stage: test
-      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base ${TRAVIS_BUILD_DIR}/references --sampleDir ${TRAVIS_BUILD_DIR}/data/testdata/tiny/normal --publishDirMode link
+    - stage: germline
+      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
+      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
+      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link

From f58d2555d6f1aeefbe3676fd02f8ed6366b1b175 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 15:11:53 +0200
Subject: [PATCH 25/28] reorganize tests

---
 .travis.yml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 61560bab33..20c7432682 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,9 +32,7 @@ install:
   # Reset
   - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests
 
-jobs:
-  include:
-    - stage: germline
-      script: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
-      script: nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
-      script: nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link
+script:
+  - git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
+  - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
+  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link

From 06782292ef92e3d987cd36c621ec15020df33473 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 15:20:26 +0200
Subject: [PATCH 26/28] reduce memory and cpus

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 20c7432682..01d73056b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,5 +34,5 @@ install:
 
 script:
   - git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
-  - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link
-  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link
+  - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link --max_memory 7.GB --max_cpus 2
+  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link --max_memory 7.GB --max_cpus 2

From 23a8cb072b101fe1e7d6066da3d815f0062c5b46 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 15:30:57 +0200
Subject: [PATCH 27/28] add tests

---
 Jenkinsfile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index ed45e1923b..7ba61ecfee 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,7 +17,12 @@ pipeline {
               sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link"
             }
         }
-        stage('Test') {
+        stage('SampleDir') {
+            steps {
+                sh "nextflow run main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sampleDir data/testdata/tiny/normal --publishDirMode link"
+            }
+        }
+        stage('Multiple') {
             steps {
                 sh "nextflow run main.nf -profile docker --genome smallGRCh37 --igenomes_base references --sample data/testdata/tsv/tiny-multiple.tsv --publishDirMode link"
             }

From 5be316378b22285a11641b299997deebe669146c Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 2 May 2019 15:31:16 +0200
Subject: [PATCH 28/28] add flowcellLaneFromFastq function

---
 main.nf | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/main.nf b/main.nf
index 8631369549..a554c41a65 100644
--- a/main.nf
+++ b/main.nf
@@ -65,6 +65,7 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome
 params.noReports = false
 params.nucleotidesPerSecond = 1000.0
 params.sampleDir = false
+params.sample = false
 params.sequencing_center = null
 params.step = 'mapping'
 params.targetBED = null
@@ -1030,6 +1031,34 @@ def extractRecal(tsvFile) {
   }
 }
 
+// Parse first line of a FASTQ file, return the flowcell id and lane number.
+def flowcellLaneFromFastq(path) {
+  // expected format:
+  // xx:yy:FLOWCELLID:LANE:... (seven fields)
+  // or
+  // FLOWCELLID:LANE:xx:... (five fields)
+  InputStream fileStream = new FileInputStream(path.toFile())
+  InputStream gzipStream = new java.util.zip.GZIPInputStream(fileStream)
+  Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
+  BufferedReader buffered = new BufferedReader(decoder)
+  def line = buffered.readLine()
+  assert line.startsWith('@')
+  line = line.substring(1)
+  def fields = line.split(' ')[0].split(':')
+  String fcid
+  int lane
+  if (fields.size() == 7) {
+    // CASAVA 1.8+ format
+    fcid = fields[2]
+    lane = fields[3].toInteger()
+  }
+  else if (fields.size() == 5) {
+    fcid = fields[0]
+    lane = fields[1].toInteger()
+  }
+  [fcid, lane]
+}
+
 // Check file extension
 def hasExtension(it, extension) {
   it.toString().toLowerCase().endsWith(extension.toLowerCase())