Index: lib/mako/exceptions.py =================================================================== --- lib/mako/exceptions.py (revision 258) +++ lib/mako/exceptions.py (working copy) @@ -6,7 +6,7 @@ """exception classes""" -import traceback, sys, re +import codecs, parser, re, sys, traceback class MakoException(Exception): pass @@ -68,7 +68,12 @@ if self.error is None: self.error = t if isinstance(self.error, CompileException) or isinstance(self.error, SyntaxException): - self.source = file(self.error.filename).read() + source_file = file(self.error.filename) + self.source = source_file.read() + self.source_encoding = _parse_encoding(source_file) + if self.source_encoding is None: + self.source_encoding = sys.getdefaultencoding() + source_file.close() self.lineno = self.error.lineno self._has_source = True self.reverse_records = [r for r in self.records] @@ -188,7 +193,7 @@

Error !

<% tback = RichTraceback() - src = tback.source + src = tback.source.decode(tback.source_encoding, 'replace') line = tback.lineno if src: lines = src.split('\n') @@ -221,3 +226,52 @@ """) + +# Regexp to match python magic encoding line +_PYTHON_MAGIC_COMMENT_re = re.compile( + r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', + re.VERBOSE) + +def _parse_encoding(fp): + """Deduce the encoding of a source file from magic comment. + + It does this in the same way as the `Python interpreter`__ + + .. __: http://docs.python.org/ref/encodings.html + + The ``fp`` argument should be a seekable file object. + """ + pos = fp.tell() + fp.seek(0) + try: + line1 = fp.readline() + has_bom = line1.startswith(codecs.BOM_UTF8) + if has_bom: + line1 = line1[len(codecs.BOM_UTF8):] + + m = _PYTHON_MAGIC_COMMENT_re.match(line1) + if not m: + try: + parser.suite(line1) + except SyntaxError: + # Either it's a real syntax error, in which case the source + # is not valid python source, or line2 is a continuation of + # line1, in which case we don't want to scan line2 for a magic + # comment. + pass + else: + line2 = fp.readline() + m = _PYTHON_MAGIC_COMMENT_re.match(line2) + + if has_bom: + if m: + raise SyntaxError( + "python refuses to compile code with both a UTF8" + " byte-order-mark and a magic encoding comment") + return 'utf_8' + elif m: + return m.group(1) + else: + return None + finally: + fp.seek(pos)