import os.path import re from c_analyzer.common.info import ID from c_analyzer.common.util import read_tsv, write_tsv from . import DATA_DIR # XXX need tests: # * generate / script IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason') IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS) # XXX Move these to ignored.tsv. IGNORED = { # global 'PyImport_FrozenModules': 'process-global', 'M___hello__': 'process-global', 'inittab_copy': 'process-global', 'PyHash_Func': 'process-global', '_Py_HashSecret_Initialized': 'process-global', '_TARGET_LOCALES': 'process-global', # startup (only changed before/during) '_PyRuntime': 'runtime startup', 'runtime_initialized': 'runtime startup', 'static_arg_parsers': 'runtime startup', 'orig_argv': 'runtime startup', 'opt_ptr': 'runtime startup', '_preinit_warnoptions': 'runtime startup', '_Py_StandardStreamEncoding': 'runtime startup', 'Py_FileSystemDefaultEncoding': 'runtime startup', '_Py_StandardStreamErrors': 'runtime startup', 'Py_FileSystemDefaultEncodeErrors': 'runtime startup', 'Py_BytesWarningFlag': 'runtime startup', 'Py_DebugFlag': 'runtime startup', 'Py_DontWriteBytecodeFlag': 'runtime startup', 'Py_FrozenFlag': 'runtime startup', 'Py_HashRandomizationFlag': 'runtime startup', 'Py_IgnoreEnvironmentFlag': 'runtime startup', 'Py_InspectFlag': 'runtime startup', 'Py_InteractiveFlag': 'runtime startup', 'Py_IsolatedFlag': 'runtime startup', 'Py_NoSiteFlag': 'runtime startup', 'Py_NoUserSiteDirectory': 'runtime startup', 'Py_OptimizeFlag': 'runtime startup', 'Py_QuietFlag': 'runtime startup', 'Py_UTF8Mode': 'runtime startup', 'Py_UnbufferedStdioFlag': 'runtime startup', 'Py_VerboseFlag': 'runtime startup', '_Py_path_config': 'runtime startup', '_PyOS_optarg': 'runtime startup', '_PyOS_opterr': 'runtime startup', '_PyOS_optind': 'runtime startup', '_Py_HashSecret': 'runtime startup', # REPL '_PyOS_ReadlineLock': 'repl', '_PyOS_ReadlineTState': 'repl', # effectively const 'tracemalloc_empty_traceback': 'const', '_empty_bitmap_node': 'const', 'posix_constants_pathconf': 'const', 'posix_constants_confstr': 'const', 'posix_constants_sysconf': 'const', '_PySys_ImplCacheTag': 'const', '_PySys_ImplName': 'const', 'PyImport_Inittab': 'const', '_PyImport_DynLoadFiletab': 'const', '_PyParser_Grammar': 'const', 'Py_hexdigits': 'const', '_PyImport_Inittab': 'const', '_PyByteArray_empty_string': 'const', '_PyLong_DigitValue': 'const', '_Py_SwappedOp': 'const', 'PyStructSequence_UnnamedField': 'const', # signals are main-thread only 'faulthandler_handlers': 'signals are main-thread only', 'user_signals': 'signals are main-thread only', 'wakeup': 'signals are main-thread only', # hacks '_PySet_Dummy': 'only used as a placeholder', } BENIGN = 'races here are benign and unlikely' def is_supported(variable, ignored=None, known=None, *, _ignored=(lambda *a, **k: _is_ignored(*a, **k)), _vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)), ): """Return True if the given global variable is okay in CPython.""" if _ignored(variable, ignored and ignored.get('variables')): return True elif _vartype_okay(variable.vartype, ignored.get('types')): return True else: return False def _is_ignored(variable, ignoredvars=None, *, _IGNORED=IGNORED, ): """Return the reason if the variable is a supported global. Return None if the variable is not a supported global. """ if ignoredvars and (reason := ignoredvars.get(variable.id)): return reason if variable.funcname is None: if reason := _IGNORED.get(variable.name): return reason # compiler if variable.filename == 'Python/graminit.c': if variable.vartype.startswith('static state '): return 'compiler' if variable.filename == 'Python/symtable.c': if variable.vartype.startswith('static identifier '): return 'compiler' if variable.filename == 'Python/Python-ast.c': # These should be const. if variable.name.endswith('_field'): return 'compiler' if variable.name.endswith('_attribute'): return 'compiler' # other if variable.filename == 'Python/dtoa.c': # guarded by lock? if variable.name in ('p5s', 'freelist'): return 'dtoa is thread-safe?' if variable.name in ('private_mem', 'pmem_next'): return 'dtoa is thread-safe?' if variable.filename == 'Python/thread.c': # Threads do not become an issue until after these have been set # and these never get changed after that. if variable.name in ('initialized', 'thread_debug'): return 'thread-safe' if variable.filename == 'Python/getversion.c': if variable.name == 'version': # Races are benign here, as well as unlikely. return BENIGN if variable.filename == 'Python/fileutils.c': if variable.name == 'force_ascii': return BENIGN if variable.name == 'ioctl_works': return BENIGN if variable.name == '_Py_open_cloexec_works': return BENIGN if variable.filename == 'Python/codecs.c': if variable.name == 'ucnhash_CAPI': return BENIGN if variable.filename == 'Python/bootstrap_hash.c': if variable.name == 'getrandom_works': return BENIGN if variable.filename == 'Objects/unicodeobject.c': if variable.name == 'ucnhash_CAPI': return BENIGN if variable.name == 'bloom_linebreak': # *mostly* benign return BENIGN if variable.filename == 'Modules/getbuildinfo.c': if variable.name == 'buildinfo': # The static is used for pre-allocation. return BENIGN if variable.filename == 'Modules/posixmodule.c': if variable.name == 'ticks_per_second': return BENIGN if variable.name == 'dup3_works': return BENIGN if variable.filename == 'Modules/timemodule.c': if variable.name == 'ticks_per_second': return BENIGN if variable.filename == 'Objects/longobject.c': if variable.name == 'log_base_BASE': return BENIGN if variable.name == 'convwidth_base': return BENIGN if variable.name == 'convmultmax_base': return BENIGN return None def _is_vartype_okay(vartype, ignoredtypes=None): if _is_object(vartype): return None if vartype.startswith('static const '): return 'const' if vartype.startswith('const '): return 'const' # components for TypeObject definitions for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'): if name in vartype: return 'const' for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods', 'PyBufferProcs', 'PyAsyncMethods'): if name in vartype: return 'const' for name in ('slotdef', 'newfunc'): if name in vartype: return 'const' # structseq for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'): if name in vartype: return 'const' # other definiitions if 'PyModuleDef' in vartype: return 'const' # thread-safe if '_Py_atomic_int' in vartype: return 'thread-safe' if 'pthread_condattr_t' in vartype: return 'thread-safe' # startup if '_Py_PreInitEntry' in vartype: return 'startup' # global # if 'PyMemAllocatorEx' in vartype: # return True # others # if 'PyThread_type_lock' in vartype: # return True # XXX ??? # _Py_tss_t # _Py_hashtable_t # stack_t # _PyUnicode_Name_CAPI # functions if '(' in vartype and '[' not in vartype: return 'function pointer' # XXX finish! # * allow const values? #raise NotImplementedError return None PYOBJECT_RE = re.compile(r''' ^ ( # must start with "static " static \s+ ( identifier ) \b ) | ( # may start with "static " ( static \s+ )? ( .* ( PyObject | PyTypeObject | _? Py \w+ Object | _PyArg_Parser | _Py_Identifier | traceback_t | PyAsyncGenASend | _PyAsyncGenWrappedValue | PyContext | method_cache_entry ) \b ) | ( ( _Py_IDENTIFIER | _Py_static_string ) [(] ) ) ''', re.VERBOSE) def _is_object(vartype): if 'PyDictKeysObject' in vartype: return False if PYOBJECT_RE.match(vartype): return True if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')): return True # XXX Add more? #for part in vartype.split(): # # XXX const is automatic True? # if part == 'PyObject' or part.startswith('PyObject['): # return True return False def ignored_from_file(infile, *, _read_tsv=read_tsv, ): """Yield a Variable for each ignored var in the file.""" ignored = { 'variables': {}, #'types': {}, #'constants': {}, #'macros': {}, } for row in _read_tsv(infile, IGNORED_HEADER): filename, funcname, name, kind, reason = row if not funcname or funcname == '-': funcname = None id = ID(filename, funcname, name) if kind == 'variable': values = ignored['variables'] else: raise ValueError(f'unsupported kind in row {row}') values[id] = reason return ignored ################################## # generate def _get_row(varid, reason): return ( varid.filename, varid.funcname or '-', varid.name, 'variable', str(reason), ) def _get_rows(variables, ignored=None, *, _as_row=_get_row, _is_ignored=_is_ignored, _vartype_okay=_is_vartype_okay, ): count = 0 for variable in variables: reason = _is_ignored(variable, ignored and ignored.get('variables'), ) if not reason: reason = _vartype_okay(variable.vartype, ignored and ignored.get('types')) if not reason: continue print(' ', variable, repr(reason)) yield _as_row(variable.id, reason) count += 1 print(f'total: {count}') def _generate_ignored_file(variables, filename=None, *, _generate_rows=_get_rows, _write_tsv=write_tsv, ): if not filename: filename = IGNORED_FILE + '.new' rows = _generate_rows(variables) _write_tsv(filename, IGNORED_HEADER, rows) if __name__ == '__main__': from cpython import SOURCE_DIRS from cpython.known import ( from_file as known_from_file, DATA_FILE as KNOWN_FILE, ) # XXX This is wrong! from . import find known = known_from_file(KNOWN_FILE) knownvars = (known or {}).get('variables') variables = find.globals_from_binary(knownvars=knownvars, dirnames=SOURCE_DIRS) _generate_ignored_file(variables)