From 4464581bb63e9789e9ee231a8c8800be5f614743 Mon Sep 17 00:00:00 2001 From: Jonathan Coe Date: Thu, 3 Nov 2016 13:55:34 +0000 Subject: [PATCH] Support for Python 3 in libclang python bindings Summary: Python bindings tests now pass in Python 3. `map` in Python 3 is lazily evaluated so the method by which functions are registered needed updating. Strings are unicode in Python 3 not UTF-8, I've tried to create an new c_types-like class (c_string_p) to automate the conversion. String conversions made explicit where required. Reviewers: eliben, nemanjai, skalinichev, compnerd Subscribers: mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D26082 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@285909 91177308-0d34-0410-b5e6-96231b3b80d8 --- bindings/python/clang/cindex.py | 145 +++++++++++------- .../tests/cindex/test_translation_unit.py | 8 +- 2 files changed, 97 insertions(+), 56 deletions(-) diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py index 0974234bd3c..7353e86e431 100644 --- a/bindings/python/clang/cindex.py +++ b/bindings/python/clang/cindex.py @@ -64,6 +64,7 @@ from ctypes import * import collections +import sys import clang.enumerations @@ -73,6 +74,33 @@ # this by marshalling object arguments as void**. c_object_p = POINTER(c_void_p) +if sys.version_info[0] > 2: +# Python 3 strings are unicode, translate them to/from utf8 for C-interop +# Python 3 replaces xrange with range, we want xrange behaviour + xrange = range + + class c_string_p(c_char_p): + def __init__(self, p=None): + if type(p) == str: + p = p.encode("utf8") + super(c_char_p, self).__init__(p) + + def __str__(self): + return str(self.value) + + @property + def value(self): + if super(c_char_p, self).value is None: + return None + return super(c_char_p, self).value.decode("utf8") + + @classmethod + def from_param(cls, param): + return cls(param) +else: + c_string_p = c_char_p + + callbacks = {} ### Exception Classes ### @@ -147,7 +175,7 @@ def __get__(self, instance, instance_type=None): class _CXString(Structure): """Helper for transforming CXString results.""" - _fields_ = [("spelling", c_char_p), ("free", c_int)] + _fields_ = [("spelling", c_string_p), ("free", c_int)] def __del__(self): conf.lib.clang_disposeString(self) @@ -329,7 +357,7 @@ def location(self): @property def spelling(self): - return conf.lib.clang_getDiagnosticSpelling(self) + return str(conf.lib.clang_getDiagnosticSpelling(self)) @property def ranges(self): @@ -358,8 +386,8 @@ def __len__(self): def __getitem__(self, key): range = SourceRange() - value = conf.lib.clang_getDiagnosticFixIt(self.diag, key, - byref(range)) + value = str(conf.lib.clang_getDiagnosticFixIt(self.diag, key, + byref(range))) if len(value) == 0: raise IndexError @@ -392,12 +420,12 @@ def category_number(self): @property def category_name(self): """The string name of the category for this diagnostic.""" - return conf.lib.clang_getDiagnosticCategoryText(self) + return str(conf.lib.clang_getDiagnosticCategoryText(self)) @property def option(self): """The command-line option that enables this diagnostic.""" - return conf.lib.clang_getDiagnosticOption(self, None) + return str(conf.lib.clang_getDiagnosticOption(self, None)) @property def disable_option(self): @@ -405,7 +433,7 @@ def disable_option(self): disable = _CXString() conf.lib.clang_getDiagnosticOption(self, byref(disable)) - return conf.lib.clang_getCString(disable) + return str(conf.lib.clang_getCString(disable)) def format(self, options=None): """ @@ -554,8 +582,8 @@ def __init__(self, value): if value >= len(self.__class__._kinds): self.__class__._kinds += [None] * (value - len(self.__class__._kinds) + 1) if self.__class__._kinds[value] is not None: - raise ValueError,'{0} value {1} already loaded'.format( - str(self.__class__), value) + raise ValueError('{0} value {1} already loaded'.format( + str(self.__class__), value)) self.value = value self.__class__._kinds[value] = self self.__class__._name_map = None @@ -572,12 +600,12 @@ def name(self): for key, value in self.__class__.__dict__.items(): if isinstance(value, self.__class__): self._name_map[value] = key - return self._name_map[self] + return str(self._name_map[self]) @classmethod def from_id(cls, id): if id >= len(cls._kinds) or cls._kinds[id] is None: - raise ValueError,'Unknown template argument kind %d' % id + raise ValueError('Unknown template argument kind %d' % id) return cls._kinds[id] def __repr__(self): @@ -596,7 +624,7 @@ class CursorKind(BaseEnumeration): @staticmethod def get_all_kinds(): """Return all CursorKind enumeration instances.""" - return filter(None, CursorKind._kinds) + return [x for x in CursorKind._kinds if x] def is_declaration(self): """Test if this is a declaration kind.""" @@ -1427,9 +1455,9 @@ def kind(self): def spelling(self): """Return the spelling of the entity pointed at by the cursor.""" if not hasattr(self, '_spelling'): - self._spelling = conf.lib.clang_getCursorSpelling(self) + self._spelling = str(conf.lib.clang_getCursorSpelling(self)) - return self._spelling + return str(self._spelling) @property def displayname(self): @@ -1441,7 +1469,7 @@ def displayname(self): arguments of a class template specialization. """ if not hasattr(self, '_displayname'): - self._displayname = conf.lib.clang_getCursorDisplayName(self) + self._displayname = str(conf.lib.clang_getCursorDisplayName(self)) return self._displayname @@ -1449,7 +1477,7 @@ def displayname(self): def mangled_name(self): """Return the mangled name for the entity referenced by this cursor.""" if not hasattr(self, '_mangled_name'): - self._mangled_name = conf.lib.clang_Cursor_getMangling(self) + self._mangled_name = str(conf.lib.clang_Cursor_getMangling(self)) return self._mangled_name @@ -1588,7 +1616,7 @@ def objc_type_encoding(self): self._objc_type_encoding = \ conf.lib.clang_getDeclObjCTypeEncoding(self) - return self._objc_type_encoding + return str(self._objc_type_encoding) @property def hash(self): @@ -1635,17 +1663,23 @@ def referenced(self): @property def brief_comment(self): """Returns the brief comment text associated with that Cursor""" - return conf.lib.clang_Cursor_getBriefCommentText(self) + r = conf.lib.clang_Cursor_getBriefCommentText(self) + if not r: + return None + return str(r) @property def raw_comment(self): """Returns the raw comment text associated with that Cursor""" - return conf.lib.clang_Cursor_getRawCommentText(self) + r = conf.lib.clang_Cursor_getRawCommentText(self) + if not r: + return None + return str(r) def get_arguments(self): """Return an iterator for accessing the arguments of this cursor.""" num_args = conf.lib.clang_Cursor_getNumArguments(self) - for i in range(0, num_args): + for i in xrange(0, num_args): yield conf.lib.clang_Cursor_getArgument(self, i) def get_num_template_arguments(self): @@ -1775,7 +1809,7 @@ def __init__(self, value): if value >= len(StorageClass._kinds): StorageClass._kinds += [None] * (value - len(StorageClass._kinds) + 1) if StorageClass._kinds[value] is not None: - raise ValueError,'StorageClass already loaded' + raise ValueError('StorageClass already loaded') self.value = value StorageClass._kinds[value] = self StorageClass._name_map = None @@ -1796,7 +1830,7 @@ def name(self): @staticmethod def from_id(id): if id >= len(StorageClass._kinds) or not StorageClass._kinds[id]: - raise ValueError,'Unknown storage class %d' % id + raise ValueError('Unknown storage class %d' % id) return StorageClass._kinds[id] def __repr__(self): @@ -1849,7 +1883,7 @@ class TypeKind(BaseEnumeration): @property def spelling(self): """Retrieve the spelling of this TypeKind.""" - return conf.lib.clang_getTypeKindSpelling(self.value) + return str(conf.lib.clang_getTypeKindSpelling(self.value)) def __repr__(self): return 'TypeKind.%s' % (self.name,) @@ -2125,7 +2159,7 @@ def get_offset(self, fieldname): """ Retrieve the offset of a field in the record. """ - return conf.lib.clang_Type_getOffsetOf(self, c_char_p(fieldname)) + return conf.lib.clang_Type_getOffsetOf(self, fieldname) def get_ref_qualifier(self): """ @@ -2152,7 +2186,7 @@ def visitor(field, children): @property def spelling(self): """Retrieve the spelling of this Type.""" - return conf.lib.clang_getTypeSpelling(self) + return str(conf.lib.clang_getTypeSpelling(self)) def __eq__(self, other): if type(other) != type(self): @@ -2184,7 +2218,7 @@ def from_param(self): class _CXUnsavedFile(Structure): """Helper for passing unsaved file arguments.""" - _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)] + _fields_ = [("name", c_string_p), ("contents", c_string_p), ('length', c_ulong)] # Functions calls through the python interface are rather slow. Fortunately, # for most symboles, we do not need to perform a function call. Their spelling @@ -2230,7 +2264,7 @@ def __init__(self, completionString, key): self.__kindNumberCache = -1 def __repr__(self): - return "{'" + self.spelling + "', " + str(self.kind) + "}" + return "{'" + str(self.spelling) + "', " + str(self.kind) + "}" @CachedProperty def spelling(self): @@ -2539,7 +2573,9 @@ def from_source(cls, filename, args=None, unsaved_files=None, options=0, args_array = None if len(args) > 0: - args_array = (c_char_p * len(args))(* args) + args_array = (c_string_p * len(args))() + for i,a in enumerate(args): + args_array[i] = c_string_p(a) unsaved_array = None if len(unsaved_files) > 0: @@ -2548,8 +2584,8 @@ def from_source(cls, filename, args=None, unsaved_files=None, options=0, if hasattr(contents, "read"): contents = contents.read() - unsaved_array[i].name = name - unsaved_array[i].contents = contents + unsaved_array[i].name = c_string_p(name) + unsaved_array[i].contents = c_string_p(contents) unsaved_array[i].length = len(contents) ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array, @@ -2604,7 +2640,7 @@ def cursor(self): @property def spelling(self): """Get the original translation unit source file name.""" - return conf.lib.clang_getTranslationUnitSpelling(self) + return str(conf.lib.clang_getTranslationUnitSpelling(self)) def get_includes(self): """ @@ -2727,9 +2763,9 @@ def reparse(self, unsaved_files=None, options=0): # FIXME: It would be great to support an efficient version # of this, one day. value = value.read() - print value + print(value) if not isinstance(value, str): - raise TypeError,'Unexpected unsaved file contents.' + raise TypeError('Unexpected unsaved file contents.') unsaved_files_array[i].name = name unsaved_files_array[i].contents = value unsaved_files_array[i].length = len(value) @@ -2791,11 +2827,11 @@ def codeComplete(self, path, line, column, unsaved_files=None, # FIXME: It would be great to support an efficient version # of this, one day. value = value.read() - print value + print(value) if not isinstance(value, str): - raise TypeError,'Unexpected unsaved file contents.' - unsaved_files_array[i].name = name - unsaved_files_array[i].contents = value + raise TypeError('Unexpected unsaved file contents.') + unsaved_files_array[i].name = c_string_p(name) + unsaved_files_array[i].contents = c_string_p(value) unsaved_files_array[i].length = len(value) ptr = conf.lib.clang_codeCompleteAt(self, path, line, column, unsaved_files_array, len(unsaved_files), options) @@ -2830,7 +2866,7 @@ def from_name(translation_unit, file_name): @property def name(self): """Return the complete file and path name of the file.""" - return conf.lib.clang_getCString(conf.lib.clang_getFileName(self)) + return str(conf.lib.clang_getCString(conf.lib.clang_getFileName(self))) @property def time(self): @@ -2838,7 +2874,7 @@ def time(self): return conf.lib.clang_getFileTime(self) def __str__(self): - return self.name + return str(self.name) def __repr__(self): return "" % (self.name) @@ -2907,12 +2943,12 @@ def __init__(self, cmd, ccmds): @property def directory(self): """Get the working directory for this CompileCommand""" - return conf.lib.clang_CompileCommand_getDirectory(self.cmd) + return str(conf.lib.clang_CompileCommand_getDirectory(self.cmd)) @property def filename(self): """Get the working filename for this CompileCommand""" - return conf.lib.clang_CompileCommand_getFilename(self.cmd) + return str(conf.lib.clang_CompileCommand_getFilename(self.cmd)) @property def arguments(self): @@ -2924,7 +2960,7 @@ def arguments(self): """ length = conf.lib.clang_CompileCommand_getNumArgs(self.cmd) for i in xrange(length): - yield conf.lib.clang_CompileCommand_getArg(self.cmd, i) + yield str(conf.lib.clang_CompileCommand_getArg(self.cmd, i)) class CompileCommands(object): """ @@ -3018,7 +3054,7 @@ def spelling(self): This is the textual representation of the token in source. """ - return conf.lib.clang_getTokenSpelling(self._tu, self) + return str(conf.lib.clang_getTokenSpelling(self._tu, self)) @property def kind(self): @@ -3061,7 +3097,7 @@ def cursor(self): [c_object_p]), ("clang_CompilationDatabase_fromDirectory", - [c_char_p, POINTER(c_uint)], + [c_string_p, POINTER(c_uint)], c_object_p, CompilationDatabase.from_result), @@ -3071,7 +3107,7 @@ def cursor(self): CompileCommands.from_result), ("clang_CompilationDatabase_getCompileCommands", - [c_object_p, c_char_p], + [c_object_p, c_string_p], c_object_p, CompileCommands.from_result), @@ -3106,7 +3142,7 @@ def cursor(self): c_uint), ("clang_codeCompleteAt", - [TranslationUnit, c_char_p, c_int, c_int, c_void_p, c_int, c_int], + [TranslationUnit, c_string_p, c_int, c_int, c_void_p, c_int, c_int], POINTER(CCRStructure)), ("clang_codeCompleteGetDiagnostic", @@ -3122,7 +3158,7 @@ def cursor(self): c_object_p), ("clang_createTranslationUnit", - [Index, c_char_p], + [Index, c_string_p], c_object_p), ("clang_CXXConstructor_isConvertingConstructor", @@ -3272,7 +3308,7 @@ def cursor(self): ("clang_getCString", [_CXString], - c_char_p), + c_string_p), ("clang_getCursor", [TranslationUnit, SourceLocation], @@ -3419,7 +3455,7 @@ def cursor(self): Type.from_result), ("clang_getFile", - [TranslationUnit, c_char_p], + [TranslationUnit, c_string_p], c_object_p), ("clang_getFileName", @@ -3548,7 +3584,7 @@ def cursor(self): ("clang_getTUResourceUsageName", [c_uint], - c_char_p), + c_string_p), ("clang_getTypeDeclaration", [Type], @@ -3643,7 +3679,7 @@ def cursor(self): bool), ("clang_parseTranslationUnit", - [Index, c_char_p, c_void_p, c_int, c_void_p, c_int, c_int], + [Index, c_string_p, c_void_p, c_int, c_void_p, c_int, c_int], c_object_p), ("clang_reparseTranslationUnit", @@ -3651,7 +3687,7 @@ def cursor(self): c_int), ("clang_saveTranslationUnit", - [TranslationUnit, c_char_p, c_uint], + [TranslationUnit, c_string_p, c_uint], c_int), ("clang_tokenize", @@ -3723,7 +3759,7 @@ def cursor(self): Type.from_result), ("clang_Type_getOffsetOf", - [Type, c_char_p], + [Type, c_string_p], c_longlong), ("clang_Type_getSizeOf", @@ -3782,7 +3818,8 @@ def register_functions(lib, ignore_errors): def register(item): return register_function(lib, item, ignore_errors) - map(register, functionList) + for f in functionList: + register(f) class Config: library_path = None diff --git a/bindings/python/tests/cindex/test_translation_unit.py b/bindings/python/tests/cindex/test_translation_unit.py index be6cd671ae0..f959cf4aafc 100644 --- a/bindings/python/tests/cindex/test_translation_unit.py +++ b/bindings/python/tests/cindex/test_translation_unit.py @@ -59,9 +59,13 @@ def test_unsaved_files(): assert spellings[-1] == 'y' def test_unsaved_files_2(): - import StringIO + try: + from StringIO import StringIO + except: + from io import StringIO + tu = TranslationUnit.from_source('fake.c', unsaved_files = [ - ('fake.c', StringIO.StringIO('int x;'))]) + ('fake.c', StringIO('int x;'))]) spellings = [c.spelling for c in tu.cursor.get_children()] assert spellings[-1] == 'x'