You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
399 lines
12 KiB
399 lines
12 KiB
import os.path
|
|
import re
|
|
|
|
from c_analyzer.common.info import ID
|
|
from c_analyzer.common.util import read_tsv, write_tsv
|
|
|
|
from . import DATA_DIR
|
|
|
|
# XXX need tests:
|
|
# * generate / script
|
|
|
|
|
|
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
|
|
|
|
IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
|
|
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
|
|
|
|
# XXX Move these to ignored.tsv.
|
|
IGNORED = {
|
|
# global
|
|
'PyImport_FrozenModules': 'process-global',
|
|
'M___hello__': 'process-global',
|
|
'inittab_copy': 'process-global',
|
|
'PyHash_Func': 'process-global',
|
|
'_Py_HashSecret_Initialized': 'process-global',
|
|
'_TARGET_LOCALES': 'process-global',
|
|
|
|
# startup (only changed before/during)
|
|
'_PyRuntime': 'runtime startup',
|
|
'runtime_initialized': 'runtime startup',
|
|
'static_arg_parsers': 'runtime startup',
|
|
'orig_argv': 'runtime startup',
|
|
'opt_ptr': 'runtime startup',
|
|
'_preinit_warnoptions': 'runtime startup',
|
|
'_Py_StandardStreamEncoding': 'runtime startup',
|
|
'Py_FileSystemDefaultEncoding': 'runtime startup',
|
|
'_Py_StandardStreamErrors': 'runtime startup',
|
|
'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
|
|
'Py_BytesWarningFlag': 'runtime startup',
|
|
'Py_DebugFlag': 'runtime startup',
|
|
'Py_DontWriteBytecodeFlag': 'runtime startup',
|
|
'Py_FrozenFlag': 'runtime startup',
|
|
'Py_HashRandomizationFlag': 'runtime startup',
|
|
'Py_IgnoreEnvironmentFlag': 'runtime startup',
|
|
'Py_InspectFlag': 'runtime startup',
|
|
'Py_InteractiveFlag': 'runtime startup',
|
|
'Py_IsolatedFlag': 'runtime startup',
|
|
'Py_NoSiteFlag': 'runtime startup',
|
|
'Py_NoUserSiteDirectory': 'runtime startup',
|
|
'Py_OptimizeFlag': 'runtime startup',
|
|
'Py_QuietFlag': 'runtime startup',
|
|
'Py_UTF8Mode': 'runtime startup',
|
|
'Py_UnbufferedStdioFlag': 'runtime startup',
|
|
'Py_VerboseFlag': 'runtime startup',
|
|
'_Py_path_config': 'runtime startup',
|
|
'_PyOS_optarg': 'runtime startup',
|
|
'_PyOS_opterr': 'runtime startup',
|
|
'_PyOS_optind': 'runtime startup',
|
|
'_Py_HashSecret': 'runtime startup',
|
|
|
|
# REPL
|
|
'_PyOS_ReadlineLock': 'repl',
|
|
'_PyOS_ReadlineTState': 'repl',
|
|
|
|
# effectively const
|
|
'tracemalloc_empty_traceback': 'const',
|
|
'_empty_bitmap_node': 'const',
|
|
'posix_constants_pathconf': 'const',
|
|
'posix_constants_confstr': 'const',
|
|
'posix_constants_sysconf': 'const',
|
|
'_PySys_ImplCacheTag': 'const',
|
|
'_PySys_ImplName': 'const',
|
|
'PyImport_Inittab': 'const',
|
|
'_PyImport_DynLoadFiletab': 'const',
|
|
'_PyParser_Grammar': 'const',
|
|
'Py_hexdigits': 'const',
|
|
'_PyImport_Inittab': 'const',
|
|
'_PyByteArray_empty_string': 'const',
|
|
'_PyLong_DigitValue': 'const',
|
|
'_Py_SwappedOp': 'const',
|
|
'PyStructSequence_UnnamedField': 'const',
|
|
|
|
# signals are main-thread only
|
|
'faulthandler_handlers': 'signals are main-thread only',
|
|
'user_signals': 'signals are main-thread only',
|
|
'wakeup': 'signals are main-thread only',
|
|
|
|
# hacks
|
|
'_PySet_Dummy': 'only used as a placeholder',
|
|
}
|
|
|
|
BENIGN = 'races here are benign and unlikely'
|
|
|
|
|
|
def is_supported(variable, ignored=None, known=None, *,
|
|
_ignored=(lambda *a, **k: _is_ignored(*a, **k)),
|
|
_vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
|
|
):
|
|
"""Return True if the given global variable is okay in CPython."""
|
|
if _ignored(variable,
|
|
ignored and ignored.get('variables')):
|
|
return True
|
|
elif _vartype_okay(variable.vartype,
|
|
ignored.get('types')):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def _is_ignored(variable, ignoredvars=None, *,
|
|
_IGNORED=IGNORED,
|
|
):
|
|
"""Return the reason if the variable is a supported global.
|
|
|
|
Return None if the variable is not a supported global.
|
|
"""
|
|
if ignoredvars and (reason := ignoredvars.get(variable.id)):
|
|
return reason
|
|
|
|
if variable.funcname is None:
|
|
if reason := _IGNORED.get(variable.name):
|
|
return reason
|
|
|
|
# compiler
|
|
if variable.filename == 'Python/graminit.c':
|
|
if variable.vartype.startswith('static state '):
|
|
return 'compiler'
|
|
if variable.filename == 'Python/symtable.c':
|
|
if variable.vartype.startswith('static identifier '):
|
|
return 'compiler'
|
|
if variable.filename == 'Python/Python-ast.c':
|
|
# These should be const.
|
|
if variable.name.endswith('_field'):
|
|
return 'compiler'
|
|
if variable.name.endswith('_attribute'):
|
|
return 'compiler'
|
|
|
|
# other
|
|
if variable.filename == 'Python/dtoa.c':
|
|
# guarded by lock?
|
|
if variable.name in ('p5s', 'freelist'):
|
|
return 'dtoa is thread-safe?'
|
|
if variable.name in ('private_mem', 'pmem_next'):
|
|
return 'dtoa is thread-safe?'
|
|
if variable.filename == 'Python/thread.c':
|
|
# Threads do not become an issue until after these have been set
|
|
# and these never get changed after that.
|
|
if variable.name in ('initialized', 'thread_debug'):
|
|
return 'thread-safe'
|
|
if variable.filename == 'Python/getversion.c':
|
|
if variable.name == 'version':
|
|
# Races are benign here, as well as unlikely.
|
|
return BENIGN
|
|
if variable.filename == 'Python/fileutils.c':
|
|
if variable.name == 'force_ascii':
|
|
return BENIGN
|
|
if variable.name == 'ioctl_works':
|
|
return BENIGN
|
|
if variable.name == '_Py_open_cloexec_works':
|
|
return BENIGN
|
|
if variable.filename == 'Python/codecs.c':
|
|
if variable.name == 'ucnhash_CAPI':
|
|
return BENIGN
|
|
if variable.filename == 'Python/bootstrap_hash.c':
|
|
if variable.name == 'getrandom_works':
|
|
return BENIGN
|
|
if variable.filename == 'Objects/unicodeobject.c':
|
|
if variable.name == 'ucnhash_CAPI':
|
|
return BENIGN
|
|
if variable.name == 'bloom_linebreak':
|
|
# *mostly* benign
|
|
return BENIGN
|
|
if variable.filename == 'Modules/getbuildinfo.c':
|
|
if variable.name == 'buildinfo':
|
|
# The static is used for pre-allocation.
|
|
return BENIGN
|
|
if variable.filename == 'Modules/posixmodule.c':
|
|
if variable.name == 'ticks_per_second':
|
|
return BENIGN
|
|
if variable.name == 'dup3_works':
|
|
return BENIGN
|
|
if variable.filename == 'Modules/timemodule.c':
|
|
if variable.name == 'ticks_per_second':
|
|
return BENIGN
|
|
if variable.filename == 'Objects/longobject.c':
|
|
if variable.name == 'log_base_BASE':
|
|
return BENIGN
|
|
if variable.name == 'convwidth_base':
|
|
return BENIGN
|
|
if variable.name == 'convmultmax_base':
|
|
return BENIGN
|
|
|
|
return None
|
|
|
|
|
|
def _is_vartype_okay(vartype, ignoredtypes=None):
|
|
if _is_object(vartype):
|
|
return None
|
|
|
|
if vartype.startswith('static const '):
|
|
return 'const'
|
|
if vartype.startswith('const '):
|
|
return 'const'
|
|
|
|
# components for TypeObject definitions
|
|
for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
|
|
if name in vartype:
|
|
return 'const'
|
|
for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
|
|
'PyBufferProcs', 'PyAsyncMethods'):
|
|
if name in vartype:
|
|
return 'const'
|
|
for name in ('slotdef', 'newfunc'):
|
|
if name in vartype:
|
|
return 'const'
|
|
|
|
# structseq
|
|
for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
|
|
if name in vartype:
|
|
return 'const'
|
|
|
|
# other definiitions
|
|
if 'PyModuleDef' in vartype:
|
|
return 'const'
|
|
|
|
# thread-safe
|
|
if '_Py_atomic_int' in vartype:
|
|
return 'thread-safe'
|
|
if 'pthread_condattr_t' in vartype:
|
|
return 'thread-safe'
|
|
|
|
# startup
|
|
if '_Py_PreInitEntry' in vartype:
|
|
return 'startup'
|
|
|
|
# global
|
|
# if 'PyMemAllocatorEx' in vartype:
|
|
# return True
|
|
|
|
# others
|
|
# if 'PyThread_type_lock' in vartype:
|
|
# return True
|
|
|
|
# XXX ???
|
|
# _Py_tss_t
|
|
# _Py_hashtable_t
|
|
# stack_t
|
|
# _PyUnicode_Name_CAPI
|
|
|
|
# functions
|
|
if '(' in vartype and '[' not in vartype:
|
|
return 'function pointer'
|
|
|
|
# XXX finish!
|
|
# * allow const values?
|
|
#raise NotImplementedError
|
|
return None
|
|
|
|
|
|
PYOBJECT_RE = re.compile(r'''
|
|
^
|
|
(
|
|
# must start with "static "
|
|
static \s+
|
|
(
|
|
identifier
|
|
)
|
|
\b
|
|
) |
|
|
(
|
|
# may start with "static "
|
|
( static \s+ )?
|
|
(
|
|
.*
|
|
(
|
|
PyObject |
|
|
PyTypeObject |
|
|
_? Py \w+ Object |
|
|
_PyArg_Parser |
|
|
_Py_Identifier |
|
|
traceback_t |
|
|
PyAsyncGenASend |
|
|
_PyAsyncGenWrappedValue |
|
|
PyContext |
|
|
method_cache_entry
|
|
)
|
|
\b
|
|
) |
|
|
(
|
|
(
|
|
_Py_IDENTIFIER |
|
|
_Py_static_string
|
|
)
|
|
[(]
|
|
)
|
|
)
|
|
''', re.VERBOSE)
|
|
|
|
|
|
def _is_object(vartype):
|
|
if 'PyDictKeysObject' in vartype:
|
|
return False
|
|
if PYOBJECT_RE.match(vartype):
|
|
return True
|
|
if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
|
|
return True
|
|
|
|
# XXX Add more?
|
|
|
|
#for part in vartype.split():
|
|
# # XXX const is automatic True?
|
|
# if part == 'PyObject' or part.startswith('PyObject['):
|
|
# return True
|
|
return False
|
|
|
|
|
|
def ignored_from_file(infile, *,
|
|
_read_tsv=read_tsv,
|
|
):
|
|
"""Yield a Variable for each ignored var in the file."""
|
|
ignored = {
|
|
'variables': {},
|
|
#'types': {},
|
|
#'constants': {},
|
|
#'macros': {},
|
|
}
|
|
for row in _read_tsv(infile, IGNORED_HEADER):
|
|
filename, funcname, name, kind, reason = row
|
|
if not funcname or funcname == '-':
|
|
funcname = None
|
|
id = ID(filename, funcname, name)
|
|
if kind == 'variable':
|
|
values = ignored['variables']
|
|
else:
|
|
raise ValueError(f'unsupported kind in row {row}')
|
|
values[id] = reason
|
|
return ignored
|
|
|
|
|
|
##################################
|
|
# generate
|
|
|
|
def _get_row(varid, reason):
|
|
return (
|
|
varid.filename,
|
|
varid.funcname or '-',
|
|
varid.name,
|
|
'variable',
|
|
str(reason),
|
|
)
|
|
|
|
|
|
def _get_rows(variables, ignored=None, *,
|
|
_as_row=_get_row,
|
|
_is_ignored=_is_ignored,
|
|
_vartype_okay=_is_vartype_okay,
|
|
):
|
|
count = 0
|
|
for variable in variables:
|
|
reason = _is_ignored(variable,
|
|
ignored and ignored.get('variables'),
|
|
)
|
|
if not reason:
|
|
reason = _vartype_okay(variable.vartype,
|
|
ignored and ignored.get('types'))
|
|
if not reason:
|
|
continue
|
|
|
|
print(' ', variable, repr(reason))
|
|
yield _as_row(variable.id, reason)
|
|
count += 1
|
|
print(f'total: {count}')
|
|
|
|
|
|
def _generate_ignored_file(variables, filename=None, *,
|
|
_generate_rows=_get_rows,
|
|
_write_tsv=write_tsv,
|
|
):
|
|
if not filename:
|
|
filename = IGNORED_FILE + '.new'
|
|
rows = _generate_rows(variables)
|
|
_write_tsv(filename, IGNORED_HEADER, rows)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from cpython import SOURCE_DIRS
|
|
from cpython.known import (
|
|
from_file as known_from_file,
|
|
DATA_FILE as KNOWN_FILE,
|
|
)
|
|
# XXX This is wrong!
|
|
from . import find
|
|
known = known_from_file(KNOWN_FILE)
|
|
knownvars = (known or {}).get('variables')
|
|
variables = find.globals_from_binary(knownvars=knownvars,
|
|
dirnames=SOURCE_DIRS)
|
|
|
|
_generate_ignored_file(variables)
|