forked from gc3-uzh-ch/python-course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpart05.tex
363 lines (292 loc) · 10.1 KB
/
part05.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
\documentclass[english,serif,mathserif,xcolor=pdftex,dvipsnames,table]{beamer}
\usetheme[informal]{s3it}
\usepackage{s3it}
\title[Part 5]{%
String manipulation and file I/O
}
\author[S3IT]{%
S3IT: Services and Support for Science IT, \\
University of Zurich
}
\date{June~23--24, 2014}
\begin{document}
% title frame
\maketitle
\begin{frame}[fragile]
\frametitle{File I/O, I}
\begin{describe}{\ttfamily stream = open(path,mode)}
Return a Python \texttt{file} object for reading or writing the
file located at \texttt{path}. Mode is one of '\texttt{r}',
'\texttt{w}' or '\texttt{a}' for reading, writing (truncates on open), appending.
You can add a `\texttt{+}' character to enable read+write (other
effects being the same).
\end{describe}
\begin{describe}{\ttfamily \emph{stream}.close()}
Close an open file.
\end{describe}
\begin{describe}{\ttfamily \textbf{for} line \textbf{in} stream:}
Loop over lines in the file one by one.
\end{describe}
\begin{references}
\url{http://docs.python.org/library/stdtypes.html#file-objects}
\end{references}
\end{frame}
\begin{frame}[fragile]
\frametitle{File I/O, II}
The \lstinline|read(n)| method can be used to read \emph{at most}
\lstinline|n| bytes from a file-like object:
\begin{lstlisting}
>>> s = stream.read(2)
>>> print(s)
'py'
\end{lstlisting}
If \lstinline|n| is omitted, \texttt{read()} reads until end-of-file.
\begin{references}
\url{http://docs.python.org/library/stdtypes.html#file-objects}
\end{references}
\end{frame}
\begin{frame}[fragile]
\begin{exercise}
Write a function \lstinline|cat(filename)| that prints the whole contents of a file.
\+
Test it with the
\href{https://raw.github.com/gc3-uzh-ch/python-course/master/welcome.py}{welcome.py}
file:
\begin{lstlisting}
>>> cat('welcome.py')
#! /usr/bin/env python
print ("Welcome to Python!")
\end{lstlisting}
\end{exercise}
\+
\begin{exercise}
Write a function \lstinline|load_data(filename)| that reads a file
containing one integer number per line, and return a list of the
integer values.
\+
Test it with the
\href{https://raw.github.com/gc3-uzh-ch/python-course/master/values.dat}{values.dat}
file:
\begin{lstlisting}
>>> load_data('values.dat')
[299850, 299740, 299900, 300070, 299930]
\end{lstlisting}
\end{exercise}
\end{frame}
\begin{frame}[fragile]
\frametitle{List comprehensions, I}
Python has a better and more compact syntax for \emph{filtering} elements
of a list and/or \emph{applying} a function to them.
\+
The previous example:
\begin{lstlisting}
data = []
for num in open('values.dat').readlines():
data.append(int(num))
\end{lstlisting}
\+
can be written using \textit{list comprehension}:
\begin{python}
data = [ int(line) for line in open('values.dat') ]
\end{python}
\end{frame}
\begin{frame}[fragile]
\frametitle{List comprehensions, II}
\def\e{\ttfamily\itshape}
The general syntax of a list comprehension is:
\begin{python}
~\bf[~ ~\e expr~ for ~\e var~ in ~\e iterable~ if ~\e condition~ ~\bf]~
\end{python}
where:
\begin{description}
\item[\e expr] is any Python expression;
\item[\e iterable] is a (generalized) sequence;
\item[\e condition] is a boolean expression, depending on
{\e var};
\item[\e var] is a variable that will be bound in turn to each item
in {\e iterable} which satisfies {\e condition}.
\end{description}
\+ \textit{Create a new list, and for each \textbf{var} in the
sequence \textbf{iterable}, if \textbf{condition} is true then add
\textbf{expr} to the list.}
\end{frame}
\begin{frame}[fragile]
\begin{exercise}
Write a function called \texttt{load\_data2(filename, bound)}
that, \textit{using comprehensions}, reads a file containing one
integer number per line, and return a list of the integer values
\textit{lesser than} \texttt{bound}.
\+
Test it with the
\href{https://raw.github.com/gc3-uzh-ch/python-course/master/values.dat}{values.dat}
file:
\begin{python}
>>> load_data2('values.dat', 300000)
[299850, 299740, 299900, 299930]
\end{python}
\end{exercise}
\end{frame}
\begin{frame}[fragile]
\frametitle{Operations on strings, I}
\begin{describe}{%
\lstinline|s.capitalize()|,
\lstinline|s.lower()|,
\lstinline|s.upper()|}
Return a \emph{copy} of the string capitalized / turned all lowercase /
turned all uppercase.
\end{describe}
\begin{describe}{\lstinline|s.split(t)|}
Split \texttt{s} at every occurrence of \texttt{t} and return a list
of parts. If \texttt{t} is omitted, split on whitespace.
\end{describe}
\begin{describe}{\lstinline|s.startswith(t)|,
\lstinline|s.endswith(t)|}
Return \texttt{True} if \texttt{t} is the initial/final substring
of \texttt{s}.
\end{describe}
\begin{references}
\url{http://docs.python.org/library/stdtypes.html#string-methods}
\end{references}
\end{frame}
\begin{frame}[fragile]
\frametitle{Operations on strings, II}
\begin{describe}{\lstinline|s.replace(old, new)|}
Return a \emph{copy} of string \texttt{s} with all occurrences of
substring \texttt{old} replaced by \texttt{new}.
\end{describe}
\begin{describe}{%
\lstinline|s.lstrip()|,
\lstinline|s.rstrip()|,
\lstinline|s.strip()|}
Return a \emph{copy} of the string with the leading (resp.\ trailing,
resp.\ leading \emph{and} trailing) whitespace removed.
\end{describe}
\begin{references}
\url{http://docs.python.org/library/stdtypes.html#string-methods}
\end{references}
\end{frame}
\begin{frame}[fragile]
\begin{exercise}
Write a program that reads the
\href{https://raw.github.com/gc3-uzh-ch/python-course/master/euro.csv}{euro.csv}
file and populates a dictionary from it: currency names (first
column) are the dictionary keys, conversion rates (second column)
are the dictionary values.
\end{exercise}
\end{frame}
\begin{frame}[fragile]
\begin{exercise}
Write a function \lstinline|wordcount(filename)| that reads a text
file and returns a dictionary, mapping words into occurrences
(disregarding case) of that word in the text. Test it with the
\href{https://raw.github.com/gc3-uzh-ch/python-course/master/lorem_ipsum.txt}{lorem\_ipsum.txt} file:
\begin{lstlisting}
>>> wordcount('lorem_ipsum.txt')
{'and': 3, 'model': 1, 'more-or-less': 1,
'letters': 1, ...
\end{lstlisting}
\+ For the purposes of this
exercise, a ``word'' is defined as a sequence of letters and the
character ``-'', i.e., ``e-mail'' and ``more-or-less'' should both
be counted as a single word.
\+ You might want to have a look at the
\href{http://docs.python.org/2/library/string.html}{string}
module, for pre-defined sets of alphabetic and punctuation
characters.
\end{exercise}
\end{frame}
\begin{frame}[fragile]
\frametitle{Filesystem operations, I}
\small
These functions are available from the \texttt{os} module.
\begin{describe}{\lstinline|os.getcwd()|, \lstinline|os.chdir(path)|}
Return the path to the current working directory /
Change the current working directory to \texttt{path}.
\end{describe}
\begin{describe}{\lstinline|os.listdir(dir)|}
Return list of entries in directory \texttt{dir} (omitting
`\texttt{.}' and `\texttt{..}')
\end{describe}
\begin{describe}{\lstinline|os.mkdir(path)|}
Create a directory; fails if the directory already exists.
Assumes that all parent directories exist already.
\end{describe}
% \begin{describe}{\lstinline|os.makedirs(path)|}
% Create a directory; no-op if the directory already exists.
% Creates all the intermediate-level directories needed to contain
% the leaf.
% \end{describe}
\begin{describe}{\lstinline|os.rename(old,new)|}
Rename a file or directory from \texttt{old} to \texttt{new}.
\end{describe}
\begin{references}
\url{http://docs.python.org/library/os.html}
\end{references}
\end{frame}
\begin{frame}[fragile]
\frametitle{Filesystem operations, II}
These functions are available from the \texttt{os.path} module.
\begin{describe}{\lstinline|os.path.exists(path)|, \lstinline|os.path.isdir(path)|, \lstinline|os.path.isfile(path)|}
Return \texttt{True} if \texttt{path} exists / is a directory / is
a regular file.
\end{describe}
\begin{describe}{\lstinline|os.path.basename(path)|,
\lstinline|os.path.dirname(path)|}
Return the base name (the part after the last `\texttt{/}'
character) or the directory name (the part before the last
\texttt{/} character).
\end{describe}
\begin{describe}{\lstinline|os.path.abspath(path)|}
Make \texttt{path} absolute (i.e., start with a \texttt{/}).
\end{describe}
\begin{references}
\url{http://docs.python.org/library/os.path.html}
\end{references}
\end{frame}
\begin{frame}[fragile]
\frametitle{Command line arguments}
The \texttt{sys} module provides access to some variables used or
maintained by the interpreter.
One of such variables is a list containing the arguments passed on
the command line.
\+
\textbf{Example:} This is a simple script that
prints the command line arguments used to invoke it:
\begin{lstlisting}
import sys
print(sys.argv)
\end{lstlisting}
\+
Calling the script as:
\begin{lstlisting}
$ python script.py foo bar
\end{lstlisting}
yields the following result:
\begin{lstlisting}
['script.py', 'foo', 'bar']
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\begin{exercise}\emph{(Homework)}
Write a Python program \texttt{rename.py} with the following
command-line:
\begin{lstlisting}[language=sh]
python rename.py EXT1 EXT2 DIR [DIR ...]
\end{lstlisting}
where:
\begin{description}
\item[ext1,ext2] Are file name extensions (without the leading
dot), e.g., \texttt{jpg} and \texttt{jpeg}.
\item[dir] Is a directory path; possibly, many directories names can
be given on the command-line.
\end{description}
The \texttt{rename.py} command should rename all files in
directory DIR, that end with extension \texttt{ext1} to end with
extension \texttt{ext2} instead.
\end{exercise}
\end{frame}
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: