1
1
#! /usr/bin/env python
2
2
# Originally written by Barry Warsaw <bwarsaw@python.org>
3
+ #
4
+ # minimally patched to make it even more xgettext compatible
5
+ # by Peter Funk <pf@artcom-gmbh.de>
3
6
4
7
"""pygettext -- Python equivalent of xgettext(1)
5
8
35
38
[2] http://www.gnu.org/software/gettext/gettext.html
36
39
37
40
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
38
- where ever possible.
41
+ where ever possible. However some options are still missing or are not fully
42
+ implemented.
39
43
40
44
Usage: pygettext [options] filename ...
41
45
45
49
--extract-all
46
50
Extract all strings
47
51
48
- -d default-domain
49
- --default-domain=default-domain
50
- Rename the default output file from messages.pot to default-domain.pot
52
+ -d name
53
+ --default-domain=name
54
+ Rename the default output file from messages.pot to name.pot
55
+
56
+ -E
57
+ --escape
58
+ replace non-ASCII characters with octal escape sequences.
59
+
60
+ -h
61
+ --help
62
+ print this help message and exit
51
63
52
64
-k [word]
53
65
--keyword[=word]
73
85
If style is omitted, Gnu is used. The style name is case
74
86
insensitive. By default, locations are included.
75
87
88
+ -o filename
89
+ --output=filename
90
+ Rename the default output file from messages.pot to filename.
91
+
92
+ -p dir
93
+ --output-dir=dir
94
+ Output files will be placed in directory dir.
95
+
76
96
-v
77
97
--verbose
78
98
Print the names of the files being processed.
79
99
80
- --help
81
- -h
82
- print this help message and exit
100
+ -V
101
+ --version
102
+ Print the version of pygettext and exit.
103
+
104
+ -w columns
105
+ --width=columns
106
+ Set width of output to columns.
107
+
108
+ -x filename
109
+ --exclude-file=filename
110
+ Specify a file that contains a list of strings that are not be
111
+ extracted from the input files. Each string to be excluded must
112
+ appear on a line by itself in the file.
83
113
84
114
"""
85
115
90
120
import getopt
91
121
import tokenize
92
122
93
- __version__ = '0.2 '
123
+ __version__ = '1.0 '
94
124
95
125
96
126
97
127
# for selftesting
98
- def _ (s ): return s
128
+ try :
129
+ import fintl
130
+ _ = fintl .gettext
131
+ except ImportError :
132
+ def _ (s ): return s
99
133
100
134
101
135
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
@@ -125,21 +159,31 @@ def usage(code, msg=''):
125
159
print msg
126
160
sys .exit (code )
127
161
162
+
128
163
129
164
escapes = []
130
- for i in range (256 ):
131
- if i < 32 or i > 127 :
132
- escapes .append ("\\ %03o" % i )
133
- else :
134
- escapes .append (chr (i ))
135
165
136
- escapes [ord ('\\ ' )] = '\\ \\ '
137
- escapes [ord ('\t ' )] = '\\ t'
138
- escapes [ord ('\r ' )] = '\\ r'
139
- escapes [ord ('\n ' )] = '\\ n'
140
- escapes [ord ('\" ' )] = '\\ "'
166
+ def make_escapes (pass_iso8859 ):
167
+ global escapes
168
+ for i in range (256 ):
169
+ if pass_iso8859 :
170
+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
171
+ # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
172
+ # we escape any character outside the 32..126 range.
173
+ i = i % 128
174
+ if 32 <= i <= 126 :
175
+ escapes .append (chr (i ))
176
+ else :
177
+ escapes .append ("\\ %03o" % i )
178
+ escapes [ord ('\\ ' )] = '\\ \\ '
179
+ escapes [ord ('\t ' )] = '\\ t'
180
+ escapes [ord ('\r ' )] = '\\ r'
181
+ escapes [ord ('\n ' )] = '\\ n'
182
+ escapes [ord ('\" ' )] = '\\ "'
183
+
141
184
142
185
def escape (s ):
186
+ global escapes
143
187
s = list (s )
144
188
for i in range (len (s )):
145
189
s [i ] = escapes [ord (s [i ])]
@@ -200,12 +244,13 @@ def __openseen(self, ttype, tstring, lineno):
200
244
# were no strings inside _(), then just ignore this entry.
201
245
if self .__data :
202
246
msg = string .join (self .__data , '' )
203
- entry = (self .__curfile , self .__lineno )
204
- linenos = self .__messages .get (msg )
205
- if linenos is None :
206
- self .__messages [msg ] = [entry ]
207
- else :
208
- linenos .append (entry )
247
+ if not msg in self .__options .toexclude :
248
+ entry = (self .__curfile , self .__lineno )
249
+ linenos = self .__messages .get (msg )
250
+ if linenos is None :
251
+ self .__messages [msg ] = [entry ]
252
+ else :
253
+ linenos .append (entry )
209
254
self .__state = self .__waiting
210
255
elif ttype == tokenize .STRING :
211
256
self .__data .append (safe_eval (tstring ))
@@ -222,20 +267,30 @@ def write(self, fp):
222
267
sys .stdout = fp
223
268
# The time stamp in the header doesn't have the same format
224
269
# as that generated by xgettext...
225
- print pot_header % {'time' : timestamp , 'version' :__version__ }
270
+ print pot_header % {'time' : timestamp , 'version' : __version__ }
226
271
for k , v in self .__messages .items ():
227
- for filename , lineno in v :
228
- # location comments are different b/w Solaris and GNU
229
- d = {'filename' : filename ,
230
- 'lineno' : lineno }
231
- if options .location == options .SOLARIS :
272
+ # location comments are different b/w Solaris and GNU:
273
+ if options .location == options .SOLARIS :
274
+ for filename , lineno in v :
275
+ d = {'filename' : filename , 'lineno' : lineno }
232
276
print _ ('# File: %(filename)s, line: %(lineno)d' ) % d
233
- elif options .location == options .GNU :
234
- print _ ('#: %(filename)s:%(lineno)d' ) % d
277
+ elif options .location == options .GNU :
278
+ # fit as many locations on one line, as long as the
279
+ # resulting line length doesn't exceeds 'options.width'
280
+ locline = '#:'
281
+ for filename , lineno in v :
282
+ d = {'filename' : filename , 'lineno' : lineno }
283
+ s = _ (' %(filename)s:%(lineno)d' ) % d
284
+ if len (locline ) + len (s ) <= options .width :
285
+ locline = locline + s
286
+ else :
287
+ print locline
288
+ locline = "#:" + s
289
+ if len (locline ) > 2 :
290
+ print locline
235
291
# TBD: sorting, normalizing
236
292
print 'msgid' , normalize (k )
237
- print 'msgstr ""'
238
- print
293
+ print 'msgstr ""\n '
239
294
finally :
240
295
sys .stdout = sys .__stdout__
241
296
@@ -245,9 +300,11 @@ def main():
245
300
try :
246
301
opts , args = getopt .getopt (
247
302
sys .argv [1 :],
248
- 'k:d:n:hv' ,
249
- ['keyword' , 'default-domain' , 'help' ,
250
- 'add-location=' , 'no-location' , 'verbose' ])
303
+ 'ad:Ehk:n:o:p:Vvw:x:' ,
304
+ ['extract-all' , 'default-domain' , 'escape' , 'help' , 'keyword' ,
305
+ 'add-location' , 'no-location' , 'output=' , 'output-dir=' ,
306
+ 'verbose' , 'version' , 'width=' , 'exclude-file=' ,
307
+ ])
251
308
except getopt .error , msg :
252
309
usage (1 , msg )
253
310
@@ -257,10 +314,15 @@ class Options:
257
314
GNU = 1
258
315
SOLARIS = 2
259
316
# defaults
317
+ extractall = 0 # FIXME: currently this option has no effect at all.
318
+ escape = 0
260
319
keywords = []
320
+ outpath = ''
261
321
outfile = 'messages.pot'
262
322
location = GNU
263
323
verbose = 0
324
+ width = 78
325
+ excludefilename = ''
264
326
265
327
options = Options ()
266
328
locations = {'gnu' : options .GNU ,
@@ -271,12 +333,16 @@ class Options:
271
333
for opt , arg in opts :
272
334
if opt in ('-h' , '--help' ):
273
335
usage (0 )
336
+ elif opt in ('-a' , '--extract-all' ):
337
+ options .extractall = 1
338
+ elif opt in ('-d' , '--default-domain' ):
339
+ options .outfile = arg + '.pot'
340
+ elif opt in ('-E' , '--escape' ):
341
+ options .escape = 1
274
342
elif opt in ('-k' , '--keyword' ):
275
343
if arg is None :
276
344
default_keywords = []
277
345
options .keywords .append (arg )
278
- elif opt in ('-d' , '--default-domain' ):
279
- options .outfile = arg + '.pot'
280
346
elif opt in ('-n' , '--add-location' ):
281
347
if arg is None :
282
348
arg = 'gnu'
@@ -287,12 +353,44 @@ class Options:
287
353
usage (1 , _ ('Invalid value for --add-location: %(arg)s' ) % d )
288
354
elif opt in ('--no-location' ,):
289
355
options .location = 0
356
+ elif opt in ('-o' , '--output' ):
357
+ options .outfile = arg
358
+ elif opt in ('-p' , '--output-dir' ):
359
+ options .outpath = arg
290
360
elif opt in ('-v' , '--verbose' ):
291
361
options .verbose = 1
362
+ elif opt in ('-V' , '--version' ):
363
+ print _ ('pygettext.py (xgettext for Python) %s' ) % __version__
364
+ sys .exit (0 )
365
+ elif opt in ('-w' , '--width' ):
366
+ try :
367
+ options .width = int (arg )
368
+ except ValueError :
369
+ d = {'arg' :arg }
370
+ usage (1 , _ ('Invalid value for --width: %(arg)s, must be int' )
371
+ % d )
372
+ elif opt in ('-x' , '--exclude-file' ):
373
+ options .excludefilename = arg
374
+
375
+ # calculate escapes
376
+ make_escapes (options .escapes )
292
377
293
378
# calculate all keywords
294
379
options .keywords .extend (default_keywords )
295
380
381
+ # initialize list of strings to exclude
382
+ if options .excludefilename :
383
+ try :
384
+ fp = open (options .excludefilename )
385
+ options .toexclude = fp .readlines ()
386
+ fp .close ()
387
+ except IOError :
388
+ sys .stderr .write (_ ("Can't read --exclude-file: %s" ) %
389
+ options .excludefilename )
390
+ sys .exit (1 )
391
+ else :
392
+ options .toexclude = []
393
+
296
394
# slurp through all the files
297
395
eater = TokenEater (options )
298
396
for filename in args :
@@ -303,6 +401,8 @@ class Options:
303
401
tokenize .tokenize (fp .readline , eater )
304
402
fp .close ()
305
403
404
+ if options .outpath :
405
+ options .outfile = os .path .join (options .outpath , options .outfile )
306
406
fp = open (options .outfile , 'w' )
307
407
eater .write (fp )
308
408
fp .close ()
0 commit comments