diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO index 1fd8052268be07..4b9b2857e1d1e7 100644 --- a/Tools/c-analyzer/TODO +++ b/Tools/c-analyzer/TODO @@ -1,3 +1,11 @@ +# For up-to-date results, run: +# ./python Tools/c-analyzer/c-analyzer.py check --format summary +# or +# ./python Tools/c-analyzer/c-analyzer.py analyze + + +####################################### +# non-PyObject (61) # allocator (16) Objects/obmalloc.c:_PyMem static PyMemAllocatorEx _PyMem @@ -32,12 +40,7 @@ Objects/dictobject.c:empty_keys_struct static PyDictKe Python/fileutils.c:_Py_open_cloexec_works int _Py_open_cloexec_works -# freelists -Objects/dictobject.c:keys_free_list static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST] -Objects/dictobject.c:numfreekeys static int numfreekeys - - -# other non-object (43) +# other non-object (40) Modules/_tracemalloc.c:allocators static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } allocators Modules/_tracemalloc.c:tables_lock static PyThread_type_lock tables_lock Modules/_tracemalloc.c:tracemalloc_filenames static _Py_hashtable_t *tracemalloc_filenames @@ -81,30 +84,7 @@ Python/pylifecycle.c:fatal_error():reentrant static int reen ####################################### -# PyObject (960) - -# freelists (10 + 10) -Modules/_collectionsmodule.c:freeblocks static block *freeblocks[MAXFREEBLOCKS] -Modules/_collectionsmodule.c:numfreeblocks static Py_ssize_t numfreeblocks -Objects/dictobject.c:free_list static PyDictObject *free_list[PyDict_MAXFREELIST] -Objects/dictobject.c:numfree static int numfree -Objects/exceptions.c:memerrors_freelist static PyBaseExceptionObject *memerrors_freelist -Objects/exceptions.c:memerrors_numfree static int memerrors_numfree -Objects/floatobject.c:free_list static PyFloatObject *free_list -Objects/floatobject.c:numfree static int numfree -Objects/frameobject.c:free_list static PyFrameObject *free_list -Objects/frameobject.c:numfree static int numfree -Objects/genobject.c:ag_asend_freelist static PyAsyncGenASend *ag_asend_freelist[_PyAsyncGen_MAXFREELIST] -Objects/genobject.c:ag_asend_freelist_free static int ag_asend_freelist_free -Objects/genobject.c:ag_value_freelist static _PyAsyncGenWrappedValue *ag_value_freelist[_PyAsyncGen_MAXFREELIST] -Objects/genobject.c:ag_value_freelist_free static int ag_value_freelist_free -Objects/listobject.c:free_list static PyListObject *free_list[PyList_MAXFREELIST] -Objects/listobject.c:numfree static int numfree -Objects/tupleobject.c:free_list static PyTupleObject *free_list[PyTuple_MAXSAVESIZE] -Objects/tupleobject.c:numfree static int numfree[PyTuple_MAXSAVESIZE] -Python/context.c:ctx_freelist static PyContext *ctx_freelist -Python/context.c:ctx_freelist_len static int ctx_freelist_len - +# PyObject (919) # singletons (7) Objects/boolobject.c:_Py_FalseStruct static struct _longobject _Py_FalseStruct @@ -116,16 +96,8 @@ Objects/object.c:_Py_NotImplementedStruct PyObject _Py_No Objects/sliceobject.c:_Py_EllipsisObject PyObject _Py_EllipsisObject -# module vars (9) -Modules/_functoolsmodule.c:kwd_mark static PyObject *kwd_mark -Modules/_localemodule.c:Error static PyObject *Error -Modules/_threadmodule.c:ThreadError static PyObject *ThreadError +# module vars (1) Modules/_tracemalloc.c:unknown_filename static PyObject *unknown_filename -Modules/signalmodule.c:DefaultHandler static PyObject *DefaultHandler -Modules/signalmodule.c:IgnoreHandler static PyObject *IgnoreHandler -Modules/signalmodule.c:IntHandler static PyObject *IntHandler -Modules/signalmodule.c:ItimerError static PyObject *ItimerError -Objects/exceptions.c:errnomap static PyObject *errnomap # other (non-cache) (5) @@ -136,26 +108,15 @@ Modules/signalmodule.c:Handlers static volatile Objects/setobject.c:_dummy_struct static PyObject _dummy_struct -# caches (5) -Modules/posixmodule.c:posix_putenv_garbage static PyObject *posix_putenv_garbage -Objects/sliceobject.c:slice_cache static PySliceObject *slice_cache -Objects/typeobject.c:method_cache static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP] -Objects/unicodeobject.c:interned static PyObject *interned +# caches (1) Python/import.c:extensions static PyObject *extensions -# cached constants - non-str (15) +# cached constants - non-str (6) Modules/_io/_iomodule.c:_PyIO_empty_bytes PyObject *_PyIO_empty_bytes Modules/_io/bufferedio.c:_PyIO_trap_eintr():eintr_int static PyObject *eintr_int -Modules/posixmodule.c:billion static PyObject *billion -Modules/posixmodule.c:wait_helper():struct_rusage static PyObject *struct_rusage -Objects/bytesobject.c:characters static PyBytesObject *characters[UCHAR_MAX + 1] -Objects/bytesobject.c:nullstring static PyBytesObject *nullstring -Objects/codeobject.c:PyCode_NewEmpty():nulltuple static PyObject *nulltuple -Objects/dictobject.c:empty_values static PyObject *empty_values[1] +Objects/dictobject.c:empty_values_struct static PyDictValues Objects/listobject.c:indexerr static PyObject *indexerr -Objects/longobject.c:small_ints static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS] -Objects/setobject.c:emptyfrozenset static PyObject *emptyfrozenset Python/context.c:_token_missing static PyObject *_token_missing Python/hamt.c:_empty_hamt static PyHamtObject *_empty_hamt @@ -662,15 +623,6 @@ Modules/itertoolsmodule.c:takewhile_type static PyTypeOb Modules/itertoolsmodule.c:tee_type static PyTypeObject tee_type Modules/itertoolsmodule.c:teedataobject_type static PyTypeObject teedataobject_type Modules/itertoolsmodule.c:ziplongest_type static PyTypeObject ziplongest_type -Modules/posixmodule.c:DirEntryType static PyTypeObject DirEntryType -Modules/posixmodule.c:ScandirIteratorType static PyTypeObject ScandirIteratorType -Modules/posixmodule.c:SchedParamType static PyTypeObject* SchedParamType -Modules/posixmodule.c:StatResultType static PyTypeObject* StatResultType -Modules/posixmodule.c:StatVFSResultType static PyTypeObject* StatVFSResultType -Modules/posixmodule.c:TerminalSizeType static PyTypeObject* TerminalSizeType -Modules/posixmodule.c:TimesResultType static PyTypeObject* TimesResultType -Modules/posixmodule.c:UnameResultType static PyTypeObject* UnameResultType -Modules/posixmodule.c:WaitidResultType static PyTypeObject* WaitidResultType Modules/signalmodule.c:SiginfoType static PyTypeObject SiginfoType Modules/timemodule.c:StructTimeType static PyTypeObject StructTimeType Modules/xxsubtype.c:spamdict_type static PyTypeObject spamdict_type diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py index 39455ddbf1a0c3..fc10aff94505da 100644 --- a/Tools/c-analyzer/c_parser/__init__.py +++ b/Tools/c-analyzer/c_parser/__init__.py @@ -1,3 +1,4 @@ +from c_common.fsutil import match_glob as _match_glob from .parser import parse as _parse from .preprocessor import get_preprocessor as _get_preprocessor @@ -5,23 +6,32 @@ def parse_file(filename, *, match_kind=None, get_file_preprocessor=None, + file_maxsizes=None, ): if get_file_preprocessor is None: get_file_preprocessor = _get_preprocessor() - yield from _parse_file(filename, match_kind, get_file_preprocessor) + yield from _parse_file( + filename, match_kind, get_file_preprocessor, file_maxsizes) def parse_files(filenames, *, match_kind=None, get_file_preprocessor=None, + file_maxsizes=None, ): if get_file_preprocessor is None: get_file_preprocessor = _get_preprocessor() for filename in filenames: - yield from _parse_file(filename, match_kind, get_file_preprocessor) + yield from _parse_file( + filename, match_kind, get_file_preprocessor, file_maxsizes) -def _parse_file(filename, match_kind, get_file_preprocessor): +def _parse_file(filename, match_kind, get_file_preprocessor, maxsizes): + srckwargs = {} + maxsize = _resolve_max_size(filename, maxsizes) + if maxsize: + srckwargs['maxtext'], srckwargs['maxlines'] = maxsize + # Preprocess the file. preprocess = get_file_preprocessor(filename) preprocessed = preprocess() @@ -30,7 +40,7 @@ def _parse_file(filename, match_kind, get_file_preprocessor): # Parse the lines. srclines = ((l.file, l.data) for l in preprocessed if l.kind == 'source') - for item in _parse(srclines): + for item in _parse(srclines, **srckwargs): if match_kind is not None and not match_kind(item.kind): continue if not item.filename: @@ -38,6 +48,22 @@ def _parse_file(filename, match_kind, get_file_preprocessor): yield item +def _resolve_max_size(filename, maxsizes): + for pattern, maxsize in (maxsizes.items() if maxsizes else ()): + if _match_glob(filename, pattern): + break + else: + return None + if not maxsize: + return None, None + maxtext, maxlines = maxsize + if maxtext is not None: + maxtext = int(maxtext) + if maxlines is not None: + maxlines = int(maxlines) + return maxtext, maxlines + + def parse_signature(text): raise NotImplementedError diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py index df70aae66b7760..b5eae2ed92d0da 100644 --- a/Tools/c-analyzer/c_parser/parser/__init__.py +++ b/Tools/c-analyzer/c_parser/parser/__init__.py @@ -120,12 +120,12 @@ from ._info import SourceInfo -def parse(srclines): +def parse(srclines, **srckwargs): if isinstance(srclines, str): # a filename raise NotImplementedError anon_name = anonymous_names() - for result in _parse(srclines, anon_name): + for result in _parse(srclines, anon_name, **srckwargs): yield ParsedItem.from_raw(result) @@ -152,17 +152,19 @@ def anon_name(prefix='anon-'): _logger = logging.getLogger(__name__) -def _parse(srclines, anon_name): +def _parse(srclines, anon_name, **srckwargs): from ._global import parse_globals - source = _iter_source(srclines) - #source = _iter_source(srclines, showtext=True) + source = _iter_source(srclines, **srckwargs) for result in parse_globals(source, anon_name): # XXX Handle blocks here instead of in parse_globals(). yield result -def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): +# We use defaults that cover most files. Files with bigger declarations +# are covered elsewhere (MAX_SIZES in cpython/_parser.py). + +def _iter_source(lines, *, maxtext=10_000, maxlines=200, showtext=False): maxtext = maxtext if maxtext and maxtext > 0 else None maxlines = maxlines if maxlines and maxlines > 0 else None filestack = [] diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py index bfc61949a76e4e..55aa8752dce724 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/__main__.py +++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py @@ -43,7 +43,7 @@ def add_common_cli(parser, *, get_preprocessor=_get_preprocessor): def process_args(args, *, argv): ns = vars(args) - process_fail_arg(args, argv) + process_fail_arg(args, argv=argv) ignore_exc = ns.pop('ignore_exc') # We later pass ignore_exc to _get_preprocessor(). diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py index 06ec871ba75e35..be331d50427d53 100644 --- a/Tools/c-analyzer/cpython/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -111,6 +111,7 @@ def cmd_parse(filenames=None, **kwargs): c_parser.cmd_parse( filenames, relroot=REPO_ROOT, + file_maxsizes=_parser.MAX_SIZES, **kwargs ) @@ -127,6 +128,7 @@ def cmd_check(filenames=None, **kwargs): relroot=REPO_ROOT, _analyze=_analyzer.analyze, _CHECKS=CHECKS, + file_maxsizes=_parser.MAX_SIZES, **kwargs ) @@ -141,6 +143,7 @@ def cmd_analyze(filenames=None, **kwargs): relroot=REPO_ROOT, _analyze=_analyzer.analyze, formats=formats, + file_maxsizes=_parser.MAX_SIZES, **kwargs ) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 8526b2af15a235..90b470c8196c05 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -14,6 +14,10 @@ GLOB_ALL = '**/*' +def _abs(relfile): + return os.path.join(REPO_ROOT, relfile) + + def clean_lines(text): """Clear out comments, blank lines, and leading/trailing whitespace.""" lines = (line.strip() for line in text.splitlines()) @@ -22,7 +26,7 @@ def clean_lines(text): if line and not line.startswith('#')) glob_all = f'{GLOB_ALL} ' lines = (re.sub(r'^[*] ', glob_all, line) for line in lines) - lines = (os.path.join(REPO_ROOT, line) for line in lines) + lines = (_abs(line) for line in lines) return list(lines) @@ -55,26 +59,31 @@ def clean_lines(text): # Windows Modules/_winapi.c # windows.h +Modules/expat/winconfig.h Modules/overlapped.c # winsock.h Python/dynload_win.c # windows.h -Modules/expat/winconfig.h Python/thread_nt.h # other OS-dependent +Python/dynload_aix.c # sys/ldr.h Python/dynload_dl.c # dl.h Python/dynload_hpux.c # dl.h -Python/dynload_aix.c # sys/ldr.h Python/thread_pthread.h # only huge constants (safe but parsing is slow) +Modules/_blake2/impl/blake2-kat.h Modules/_ssl_data.h +Modules/_ssl_data_300.h +Modules/_ssl_data_111.h +Modules/cjkcodecs/mappings_*.h Modules/unicodedata_db.h Modules/unicodename_db.h -Modules/cjkcodecs/mappings_*.h Objects/unicodetype_db.h -Python/importlib.h -Python/importlib_external.h -Python/importlib_zipimport.h + +# generated +Python/frozen_modules/*.h +Python/opcode_targets.h +Python/stdlib_module_names.h # @end=conf@ ''') @@ -126,35 +135,40 @@ def clean_lines(text): Parser/**/*.c Py_BUILD_CORE 1 Objects/**/*.c Py_BUILD_CORE 1 -Modules/faulthandler.c Py_BUILD_CORE 1 +Modules/_asynciomodule.c Py_BUILD_CORE 1 +Modules/_collectionsmodule.c Py_BUILD_CORE 1 +Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1 +Modules/_ctypes/cfield.c Py_BUILD_CORE 1 +Modules/_cursesmodule.c Py_BUILD_CORE 1 +Modules/_datetimemodule.c Py_BUILD_CORE 1 Modules/_functoolsmodule.c Py_BUILD_CORE 1 -Modules/gcmodule.c Py_BUILD_CORE 1 -Modules/getpath.c Py_BUILD_CORE 1 +Modules/_heapqmodule.c Py_BUILD_CORE 1 Modules/_io/*.c Py_BUILD_CORE 1 -Modules/itertoolsmodule.c Py_BUILD_CORE 1 Modules/_localemodule.c Py_BUILD_CORE 1 -Modules/main.c Py_BUILD_CORE 1 -Modules/posixmodule.c Py_BUILD_CORE 1 -Modules/signalmodule.c Py_BUILD_CORE 1 +Modules/_operator.c Py_BUILD_CORE 1 +Modules/_posixsubprocess.c Py_BUILD_CORE 1 +Modules/_sre.c Py_BUILD_CORE 1 Modules/_threadmodule.c Py_BUILD_CORE 1 Modules/_tracemalloc.c Py_BUILD_CORE 1 -Modules/_asynciomodule.c Py_BUILD_CORE 1 -Modules/mathmodule.c Py_BUILD_CORE 1 -Modules/cmathmodule.c Py_BUILD_CORE 1 Modules/_weakref.c Py_BUILD_CORE 1 +Modules/_zoneinfo.c Py_BUILD_CORE 1 +Modules/atexitmodule.c Py_BUILD_CORE 1 +Modules/cmathmodule.c Py_BUILD_CORE 1 +Modules/faulthandler.c Py_BUILD_CORE 1 +Modules/gcmodule.c Py_BUILD_CORE 1 +Modules/getpath.c Py_BUILD_CORE 1 +Modules/itertoolsmodule.c Py_BUILD_CORE 1 +Modules/main.c Py_BUILD_CORE 1 +Modules/mathmodule.c Py_BUILD_CORE 1 +Modules/posixmodule.c Py_BUILD_CORE 1 Modules/sha256module.c Py_BUILD_CORE 1 Modules/sha512module.c Py_BUILD_CORE 1 -Modules/_datetimemodule.c Py_BUILD_CORE 1 -Modules/_ctypes/cfield.c Py_BUILD_CORE 1 -Modules/_heapqmodule.c Py_BUILD_CORE 1 -Modules/_posixsubprocess.c Py_BUILD_CORE 1 -Modules/_sre.c Py_BUILD_CORE 1 -Modules/_collectionsmodule.c Py_BUILD_CORE 1 -Modules/_zoneinfo.c Py_BUILD_CORE 1 +Modules/signalmodule.c Py_BUILD_CORE 1 +Modules/symtablemodule.c Py_BUILD_CORE 1 +Modules/timemodule.c Py_BUILD_CORE 1 Modules/unicodedata.c Py_BUILD_CORE 1 -Modules/_cursesmodule.c Py_BUILD_CORE 1 -Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1 Objects/stringlib/codecs.h Py_BUILD_CORE 1 +Objects/stringlib/unicode_format.h Py_BUILD_CORE 1 Python/ceval_gil.h Py_BUILD_CORE 1 Python/condvar.h Py_BUILD_CORE 1 @@ -244,6 +258,7 @@ def clean_lines(text): Modules/sre_lib.h LOCAL(type) static inline type Modules/sre_lib.h SRE(F) sre_ucs2_##F Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 +Include/internal/pycore_bitutils.h _Py__has_builtin(B) 0 # @end=tsv@ ''')[1:] @@ -264,6 +279,18 @@ def clean_lines(text): './Include/cpython/', ] +MAX_SIZES = { + _abs('Include/**/*.h'): (5_000, 500), + _abs('Modules/_ctypes/ctypes.h'): (5_000, 500), + _abs('Modules/_datetimemodule.c'): (20_000, 300), + _abs('Modules/posixmodule.c'): (20_000, 500), + _abs('Modules/termios.c'): (10_000, 800), + _abs('Modules/_testcapimodule.c'): (20_000, 400), + _abs('Modules/expat/expat.h'): (10_000, 400), + _abs('Objects/stringlib/unicode_format.h'): (10_000, 400), + _abs('Objects/typeobject.c'): (20_000, 200), +} + def get_preprocessor(*, file_macros=None, @@ -298,6 +325,7 @@ def parse_file(filename, *, filename, match_kind=match_kind, get_file_preprocessor=get_file_preprocessor, + file_maxsizes=MAX_SIZES, ) @@ -317,5 +345,6 @@ def parse_files(filenames=None, *, filenames, match_kind=match_kind, get_file_preprocessor=get_file_preprocessor, + file_maxsizes=MAX_SIZES, **file_kwargs )