Skip to content

Commit c8f0892

Browse files
committed
Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to
make pygettext more compatible with GNU xgettext, specifically: Added -E/--escape for allowing pass-thru of iso8859-1 characters above 7 bits. Added -o/--output option for renaming the output file from messages.pot (there's overlap with -d/--default-domain, but GNU xgettext has them both). Added -p/--output-dir for specifying the output directory for messages.pot. Added -V/--version for printing the version number. Added -w/--width for specifying the output page width (this is because now pygettext, like GNU xgettext will put several locations on the same line to cut down on vertical space). Added -x/--exclude-file for specifying a list of strings that are not to be extracted from the input files. Bumped version number to 1.0 Try to import fintl and use fintl.gettext as _ if available. Fall back is to use identity definition of _(). Moved the escape creation to a function make_escapes() so that its behavior can be controlled by the -E option. __openseen(): Support the -x option. write(): Support -w option and vertical space preserving feature. main(): Support new options.
1 parent abc5216 commit c8f0892

File tree

1 file changed

+140
-40
lines changed

1 file changed

+140
-40
lines changed

Tools/i18n/pygettext.py

+140-40
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#! /usr/bin/env python
22
# Originally written by Barry Warsaw <bwarsaw@python.org>
3+
#
4+
# minimally patched to make it even more xgettext compatible
5+
# by Peter Funk <pf@artcom-gmbh.de>
36

47
"""pygettext -- Python equivalent of xgettext(1)
58
@@ -35,7 +38,8 @@
3538
[2] http://www.gnu.org/software/gettext/gettext.html
3639
3740
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
38-
where ever possible.
41+
where ever possible. However some options are still missing or are not fully
42+
implemented.
3943
4044
Usage: pygettext [options] filename ...
4145
@@ -45,9 +49,17 @@
4549
--extract-all
4650
Extract all strings
4751
48-
-d default-domain
49-
--default-domain=default-domain
50-
Rename the default output file from messages.pot to default-domain.pot
52+
-d name
53+
--default-domain=name
54+
Rename the default output file from messages.pot to name.pot
55+
56+
-E
57+
--escape
58+
replace non-ASCII characters with octal escape sequences.
59+
60+
-h
61+
--help
62+
print this help message and exit
5163
5264
-k [word]
5365
--keyword[=word]
@@ -73,13 +85,31 @@
7385
If style is omitted, Gnu is used. The style name is case
7486
insensitive. By default, locations are included.
7587
88+
-o filename
89+
--output=filename
90+
Rename the default output file from messages.pot to filename.
91+
92+
-p dir
93+
--output-dir=dir
94+
Output files will be placed in directory dir.
95+
7696
-v
7797
--verbose
7898
Print the names of the files being processed.
7999
80-
--help
81-
-h
82-
print this help message and exit
100+
-V
101+
--version
102+
Print the version of pygettext and exit.
103+
104+
-w columns
105+
--width=columns
106+
Set width of output to columns.
107+
108+
-x filename
109+
--exclude-file=filename
110+
Specify a file that contains a list of strings that are not be
111+
extracted from the input files. Each string to be excluded must
112+
appear on a line by itself in the file.
83113
84114
"""
85115

@@ -90,12 +120,16 @@
90120
import getopt
91121
import tokenize
92122

93-
__version__ = '0.2'
123+
__version__ = '1.0'
94124

95125

96126

97127
# for selftesting
98-
def _(s): return s
128+
try:
129+
import fintl
130+
_ = fintl.gettext
131+
except ImportError:
132+
def _(s): return s
99133

100134

101135
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
@@ -125,21 +159,31 @@ def usage(code, msg=''):
125159
print msg
126160
sys.exit(code)
127161

162+
128163

129164
escapes = []
130-
for i in range(256):
131-
if i < 32 or i > 127:
132-
escapes.append("\\%03o" % i)
133-
else:
134-
escapes.append(chr(i))
135165

136-
escapes[ord('\\')] = '\\\\'
137-
escapes[ord('\t')] = '\\t'
138-
escapes[ord('\r')] = '\\r'
139-
escapes[ord('\n')] = '\\n'
140-
escapes[ord('\"')] = '\\"'
166+
def make_escapes(pass_iso8859):
167+
global escapes
168+
for i in range(256):
169+
if pass_iso8859:
170+
# Allow iso-8859 characters to pass through so that e.g. 'msgid
171+
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
172+
# we escape any character outside the 32..126 range.
173+
i = i % 128
174+
if 32 <= i <= 126:
175+
escapes.append(chr(i))
176+
else:
177+
escapes.append("\\%03o" % i)
178+
escapes[ord('\\')] = '\\\\'
179+
escapes[ord('\t')] = '\\t'
180+
escapes[ord('\r')] = '\\r'
181+
escapes[ord('\n')] = '\\n'
182+
escapes[ord('\"')] = '\\"'
183+
141184

142185
def escape(s):
186+
global escapes
143187
s = list(s)
144188
for i in range(len(s)):
145189
s[i] = escapes[ord(s[i])]
@@ -200,12 +244,13 @@ def __openseen(self, ttype, tstring, lineno):
200244
# were no strings inside _(), then just ignore this entry.
201245
if self.__data:
202246
msg = string.join(self.__data, '')
203-
entry = (self.__curfile, self.__lineno)
204-
linenos = self.__messages.get(msg)
205-
if linenos is None:
206-
self.__messages[msg] = [entry]
207-
else:
208-
linenos.append(entry)
247+
if not msg in self.__options.toexclude:
248+
entry = (self.__curfile, self.__lineno)
249+
linenos = self.__messages.get(msg)
250+
if linenos is None:
251+
self.__messages[msg] = [entry]
252+
else:
253+
linenos.append(entry)
209254
self.__state = self.__waiting
210255
elif ttype == tokenize.STRING:
211256
self.__data.append(safe_eval(tstring))
@@ -222,20 +267,30 @@ def write(self, fp):
222267
sys.stdout = fp
223268
# The time stamp in the header doesn't have the same format
224269
# as that generated by xgettext...
225-
print pot_header % {'time': timestamp, 'version':__version__}
270+
print pot_header % {'time': timestamp, 'version': __version__}
226271
for k, v in self.__messages.items():
227-
for filename, lineno in v:
228-
# location comments are different b/w Solaris and GNU
229-
d = {'filename': filename,
230-
'lineno': lineno}
231-
if options.location == options.SOLARIS:
272+
# location comments are different b/w Solaris and GNU:
273+
if options.location == options.SOLARIS:
274+
for filename, lineno in v:
275+
d = {'filename': filename, 'lineno': lineno}
232276
print _('# File: %(filename)s, line: %(lineno)d') % d
233-
elif options.location == options.GNU:
234-
print _('#: %(filename)s:%(lineno)d') % d
277+
elif options.location == options.GNU:
278+
# fit as many locations on one line, as long as the
279+
# resulting line length doesn't exceeds 'options.width'
280+
locline = '#:'
281+
for filename, lineno in v:
282+
d = {'filename': filename, 'lineno': lineno}
283+
s = _(' %(filename)s:%(lineno)d') % d
284+
if len(locline) + len(s) <= options.width:
285+
locline = locline + s
286+
else:
287+
print locline
288+
locline = "#:" + s
289+
if len(locline) > 2:
290+
print locline
235291
# TBD: sorting, normalizing
236292
print 'msgid', normalize(k)
237-
print 'msgstr ""'
238-
print
293+
print 'msgstr ""\n'
239294
finally:
240295
sys.stdout = sys.__stdout__
241296

@@ -245,9 +300,11 @@ def main():
245300
try:
246301
opts, args = getopt.getopt(
247302
sys.argv[1:],
248-
'k:d:n:hv',
249-
['keyword', 'default-domain', 'help',
250-
'add-location=', 'no-location', 'verbose'])
303+
'ad:Ehk:n:o:p:Vvw:x:',
304+
['extract-all', 'default-domain', 'escape', 'help', 'keyword',
305+
'add-location', 'no-location', 'output=', 'output-dir=',
306+
'verbose', 'version', 'width=', 'exclude-file=',
307+
])
251308
except getopt.error, msg:
252309
usage(1, msg)
253310

@@ -257,10 +314,15 @@ class Options:
257314
GNU = 1
258315
SOLARIS = 2
259316
# defaults
317+
extractall = 0 # FIXME: currently this option has no effect at all.
318+
escape = 0
260319
keywords = []
320+
outpath = ''
261321
outfile = 'messages.pot'
262322
location = GNU
263323
verbose = 0
324+
width = 78
325+
excludefilename = ''
264326

265327
options = Options()
266328
locations = {'gnu' : options.GNU,
@@ -271,12 +333,16 @@ class Options:
271333
for opt, arg in opts:
272334
if opt in ('-h', '--help'):
273335
usage(0)
336+
elif opt in ('-a', '--extract-all'):
337+
options.extractall = 1
338+
elif opt in ('-d', '--default-domain'):
339+
options.outfile = arg + '.pot'
340+
elif opt in ('-E', '--escape'):
341+
options.escape = 1
274342
elif opt in ('-k', '--keyword'):
275343
if arg is None:
276344
default_keywords = []
277345
options.keywords.append(arg)
278-
elif opt in ('-d', '--default-domain'):
279-
options.outfile = arg + '.pot'
280346
elif opt in ('-n', '--add-location'):
281347
if arg is None:
282348
arg = 'gnu'
@@ -287,12 +353,44 @@ class Options:
287353
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
288354
elif opt in ('--no-location',):
289355
options.location = 0
356+
elif opt in ('-o', '--output'):
357+
options.outfile = arg
358+
elif opt in ('-p', '--output-dir'):
359+
options.outpath = arg
290360
elif opt in ('-v', '--verbose'):
291361
options.verbose = 1
362+
elif opt in ('-V', '--version'):
363+
print _('pygettext.py (xgettext for Python) %s') % __version__
364+
sys.exit(0)
365+
elif opt in ('-w', '--width'):
366+
try:
367+
options.width = int(arg)
368+
except ValueError:
369+
d = {'arg':arg}
370+
usage(1, _('Invalid value for --width: %(arg)s, must be int')
371+
% d)
372+
elif opt in ('-x', '--exclude-file'):
373+
options.excludefilename = arg
374+
375+
# calculate escapes
376+
make_escapes(options.escapes)
292377

293378
# calculate all keywords
294379
options.keywords.extend(default_keywords)
295380

381+
# initialize list of strings to exclude
382+
if options.excludefilename:
383+
try:
384+
fp = open(options.excludefilename)
385+
options.toexclude = fp.readlines()
386+
fp.close()
387+
except IOError:
388+
sys.stderr.write(_("Can't read --exclude-file: %s") %
389+
options.excludefilename)
390+
sys.exit(1)
391+
else:
392+
options.toexclude = []
393+
296394
# slurp through all the files
297395
eater = TokenEater(options)
298396
for filename in args:
@@ -303,6 +401,8 @@ class Options:
303401
tokenize.tokenize(fp.readline, eater)
304402
fp.close()
305403

404+
if options.outpath:
405+
options.outfile = os.path.join(options.outpath, options.outfile)
306406
fp = open(options.outfile, 'w')
307407
eater.write(fp)
308408
fp.close()

0 commit comments

Comments
 (0)