forked from chernan/FAIR_bioinfo_docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfromBash2Snakemake.tex
149 lines (147 loc) · 6.09 KB
/
fromBash2Snakemake.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{Snakemake point}
%-------------------------------------------
\begin{block}{So far, we've seen:}
\begin{itemize}
\item the rule and the workflow concepts, the snakefile
\item how rules are linked thank to input/output files and the first rule, the target rule
\item how to generalize the inputs of a rule using wildcards on filenames (and the \verb|expand| function)
\item how to redirect \verb|stdout| and \verb|stderr| streams (log)
\end{itemize}
\end{block}
\begin{block}{From now, we will seen some snakemake options:}
\begin{itemize}
\item adding a configuration file
\item getting file names from the file system
\item use a conda environment
\item to visualize the workflow diagram, use a dry-run option, etc
%\item \alert{the container directive ???}
%\item \alert{how to run snakemake on cluster ???}
\end{itemize}
\end{block}
\end{frame}
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{Using a configuration file}
%-------------------------------------------
Why use a configuration file?\\
To place all hard-coding values of the snakefile (paths to files, core numbers, parameter values, etc)
\begin{block}{How to?}
\begin{itemize}
\item create a file written in yml or json (eg. \verb|myConfig.yml|)
\item run with the \verb|--configfile myConfig.yml| Snakemake option or ii) add \verb|configfile: myConfig.yml| at the beginning of the snakefile
\item in the snakefile, call the defined items with \verb|config["item1"]|
\end{itemize}
\end{block}
\end{frame}
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{Using a configuration file}
%-------------------------------------------
\begin{exampleblock}{myConfig.yml}
\begin{lstlisting}
dataDir:
Data/
\end{lstlisting}
\end{exampleblock}
\begin{exampleblock}{Replace "Data/..." in inputs by a config call:}
\begin{lstlisting}
rule fastqc:
input: config["dataDir"]+"{sample}.fastq.gz"
\end{lstlisting}
\end{exampleblock}
\begin{exampleblock}{And Run with the configfile option:}
\begin{lstlisting}
snakemake -c1 -s ex1_o7.smk --configfile myConfig.yml
\end{lstlisting}
\end{exampleblock}
\end{frame}
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{File names from the file system}
%-------------------------------------------
To deduce the identifiers (eg. IDs) of files in a directory, use the inbuilt \verb|glob_wildcards| function:
\begin{block}{Eg. of the glob$\_$wilcards function}
\begin{lstlisting}
IDs, = glob_wildcards("dirpath/{id}.fastq")
\end{lstlisting}
\end{block}
\verb|glob_wildcards()| matches the given pattern against the files present in the file system and thereby infers the values for all wildcards in the pattern (\verb|{id}| here).
\vfill
Don't forget the \textcolor{blue}{coma} after the name (left hand side, \verb|IDs| here).
\end{frame}
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{Conda environment}
%-------------------------------------------
\begin{block}{Snakemake and conda}
In the practical exercise we will have one conda environment for executing the whole Snakemake workflow. \\
Snakemake also supports using explicit conda environments on a per-rule basis:
\begin{itemize}
\item add a \verb|conda:| directive in the rule definition :
\begin{lstlisting}
conda: rule-specific-env.yml
\end{lstlisting}
\item run Snakemake with the \verb|--use-conda| option
\end{itemize}
The specified environment will be created and activated on the fly by Snakemake and the rule will then be run in the conda environment.
\end{block}
\end{frame}
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{Snakemake DAG visualization}
%-------------------------------------------
\begin{block}{}
Snakemake uses \verb|dot| tool (from graphviz package) to create diagrams of the complete workflow (\verb|--dag|) or the rules dependencies (\verb|--rulegraph|):
\begin{lstlisting}
snakemake --dag -s ex1_o7.smk | dot -Tpng > ex1_o7_dag.png
snakemake --rulegraph -s ex1_o7.smk | dot -Tpng > ex1_o7_rule.png
\end{lstlisting}
\end{block}
\begin{columns}
\column{0.80\textwidth}
\begin{center}
\includegraphics[height=3.6cm]{03_workflow/images/ex1_o7_dag.png}
\end{center}
\column{0.20\textwidth}
\begin{center}
\includegraphics[height=3.6cm]{03_workflow/images/ex1_o7_rule.png}
\end{center}
\end{columns}
\end{frame}
%-------------------------------------------
\begin{frame}[containsverbatim]
\frametitle{Other useful options}
%-------------------------------------------
\begin{block}{Running options}
\begin{itemize}
\item dry-run, do not execute anything, display what would be done: \verb|-n --dryrun|
\item print the shell command: \verb|-p --printshellcmds |
\item print the reason for each rule execution: \verb|-r --reason|
\item print a summary and status of rule: \verb|-D|
\item limit the number of jobs in parallel: \verb|-j 1| (cores: \verb|-c 1|)
\item automatically create HTML reports (\verb|--report report.html|) containing runtime statistics, a visualization of the workflow topology, used software and data provenance information (need to add the \verb|jinja2| package as a dependency)
\end{itemize}
\end{block}
\vfill
\href{https://snakemake.readthedocs.io/en/stable/executing/cli.html#all-option}{\textcolor{blue}{\underline{all Snakemake options}}}
\end{frame}
%-------------------------------------------
%\begin{frame}[containsverbatim]
%\frametitle{IFB cluster options}
%-------------------------------------------
%\begin{block}{interactive session}
%To use the option \verb|--cores=2|, don't forget to ask 2 CPUS for the interactive session (\verb|sinteractive --cpus=2|)
%\end{block}
%\begin{block}{cluster mode}
%To launch the snakefile on a cluster mode, just do:
%\begin{itemize}
% \item load the \verb|slurm-drmaa| module
% \item run snakemake with the \verb|--drmaa| option
%\begin{lstlisting}
%snakemake --drmaa --jobs=12 -s ex2_o6.smk --configfile ex2_o1.yml
%\end{lstlisting}
%\end{itemize}
%\end{block}
%\end{frame}