You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
330 lines
12 KiB
330 lines
12 KiB
# The code here consists of hacks for pre-populating the known.tsv file.
|
|
|
|
from c_analyzer.parser.preprocessor import _iter_clean_lines
|
|
from c_analyzer.parser.naive import (
|
|
iter_variables, parse_variable_declaration, find_variables,
|
|
)
|
|
from c_analyzer.common.known import HEADER as KNOWN_HEADER
|
|
from c_analyzer.common.info import UNKNOWN, ID
|
|
from c_analyzer.variables import Variable
|
|
from c_analyzer.util import write_tsv
|
|
|
|
from . import SOURCE_DIRS, REPO_ROOT
|
|
from .known import DATA_FILE as KNOWN_FILE
|
|
from .files import iter_cpython_files
|
|
|
|
|
|
POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
|
|
POTS += tuple('const ' + v for v in POTS)
|
|
STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
|
|
|
|
|
|
def _parse_global(line, funcname=None):
|
|
line = line.strip()
|
|
if line.startswith('static '):
|
|
if '(' in line and '[' not in line and ' = ' not in line:
|
|
return None, None
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith('_Py_static_string('):
|
|
decl = line.strip(';').strip()
|
|
name = line.split('(')[1].split(',')[0].strip()
|
|
elif line.startswith('_Py_IDENTIFIER('):
|
|
decl = line.strip(';').strip()
|
|
name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
|
|
elif funcname:
|
|
return None, None
|
|
|
|
# global-only
|
|
elif line.startswith('PyAPI_DATA('): # only in .h files
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith('extern '): # only in .h files
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith('PyDoc_VAR('):
|
|
decl = line.strip(';').strip()
|
|
name = line.split('(')[1].split(')')[0].strip()
|
|
elif line.startswith(POTS): # implied static
|
|
if '(' in line and '[' not in line and ' = ' not in line:
|
|
return None, None
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
|
|
name, decl = parse_variable_declaration(line)
|
|
elif line.startswith('struct '):
|
|
if not line.endswith(' = {'):
|
|
return None, None
|
|
if not line.partition(' ')[2].startswith(STRUCTS):
|
|
return None, None
|
|
# implied static
|
|
name, decl = parse_variable_declaration(line)
|
|
|
|
# file-specific
|
|
elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
|
|
# Objects/typeobject.c
|
|
funcname = line.split('(')[1].split(',')[0]
|
|
return [
|
|
('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
|
|
('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
|
|
]
|
|
elif line.startswith('WRAP_METHOD('):
|
|
# Objects/weakrefobject.c
|
|
funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
|
|
return [
|
|
('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
|
|
]
|
|
|
|
else:
|
|
return None, None
|
|
return name, decl
|
|
|
|
|
|
def _pop_cached(varcache, filename, funcname, name, *,
|
|
_iter_variables=iter_variables,
|
|
):
|
|
# Look for the file.
|
|
try:
|
|
cached = varcache[filename]
|
|
except KeyError:
|
|
cached = varcache[filename] = {}
|
|
for variable in _iter_variables(filename,
|
|
parse_variable=_parse_global,
|
|
):
|
|
variable._isglobal = True
|
|
cached[variable.id] = variable
|
|
for var in cached:
|
|
print(' ', var)
|
|
|
|
# Look for the variable.
|
|
if funcname == UNKNOWN:
|
|
for varid in cached:
|
|
if varid.name == name:
|
|
break
|
|
else:
|
|
return None
|
|
return cached.pop(varid)
|
|
else:
|
|
return cached.pop((filename, funcname, name), None)
|
|
|
|
|
|
def find_matching_variable(varid, varcache, allfilenames, *,
|
|
_pop_cached=_pop_cached,
|
|
):
|
|
if varid.filename and varid.filename != UNKNOWN:
|
|
filenames = [varid.filename]
|
|
else:
|
|
filenames = allfilenames
|
|
for filename in filenames:
|
|
variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
|
|
if variable is not None:
|
|
return variable
|
|
else:
|
|
if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
|
|
for filename in allfilenames:
|
|
if not filename.endswith('.h'):
|
|
continue
|
|
variable = _pop_cached(varcache, filename, None, varid.name)
|
|
if variable is not None:
|
|
return variable
|
|
return None
|
|
|
|
|
|
MULTILINE = {
|
|
# Python/Python-ast.c
|
|
'Load_singleton': 'PyObject *',
|
|
'Store_singleton': 'PyObject *',
|
|
'Del_singleton': 'PyObject *',
|
|
'AugLoad_singleton': 'PyObject *',
|
|
'AugStore_singleton': 'PyObject *',
|
|
'Param_singleton': 'PyObject *',
|
|
'And_singleton': 'PyObject *',
|
|
'Or_singleton': 'PyObject *',
|
|
'Add_singleton': 'static PyObject *',
|
|
'Sub_singleton': 'static PyObject *',
|
|
'Mult_singleton': 'static PyObject *',
|
|
'MatMult_singleton': 'static PyObject *',
|
|
'Div_singleton': 'static PyObject *',
|
|
'Mod_singleton': 'static PyObject *',
|
|
'Pow_singleton': 'static PyObject *',
|
|
'LShift_singleton': 'static PyObject *',
|
|
'RShift_singleton': 'static PyObject *',
|
|
'BitOr_singleton': 'static PyObject *',
|
|
'BitXor_singleton': 'static PyObject *',
|
|
'BitAnd_singleton': 'static PyObject *',
|
|
'FloorDiv_singleton': 'static PyObject *',
|
|
'Invert_singleton': 'static PyObject *',
|
|
'Not_singleton': 'static PyObject *',
|
|
'UAdd_singleton': 'static PyObject *',
|
|
'USub_singleton': 'static PyObject *',
|
|
'Eq_singleton': 'static PyObject *',
|
|
'NotEq_singleton': 'static PyObject *',
|
|
'Lt_singleton': 'static PyObject *',
|
|
'LtE_singleton': 'static PyObject *',
|
|
'Gt_singleton': 'static PyObject *',
|
|
'GtE_singleton': 'static PyObject *',
|
|
'Is_singleton': 'static PyObject *',
|
|
'IsNot_singleton': 'static PyObject *',
|
|
'In_singleton': 'static PyObject *',
|
|
'NotIn_singleton': 'static PyObject *',
|
|
# Python/symtable.c
|
|
'top': 'static identifier ',
|
|
'lambda': 'static identifier ',
|
|
'genexpr': 'static identifier ',
|
|
'listcomp': 'static identifier ',
|
|
'setcomp': 'static identifier ',
|
|
'dictcomp': 'static identifier ',
|
|
'__class__': 'static identifier ',
|
|
# Python/compile.c
|
|
'__doc__': 'static PyObject *',
|
|
'__annotations__': 'static PyObject *',
|
|
# Objects/floatobject.c
|
|
'double_format': 'static float_format_type ',
|
|
'float_format': 'static float_format_type ',
|
|
'detected_double_format': 'static float_format_type ',
|
|
'detected_float_format': 'static float_format_type ',
|
|
# Parser/listnode.c
|
|
'level': 'static int ',
|
|
'atbol': 'static int ',
|
|
# Python/dtoa.c
|
|
'private_mem': 'static double private_mem[PRIVATE_mem]',
|
|
'pmem_next': 'static double *',
|
|
# Modules/_weakref.c
|
|
'weakref_functions': 'static PyMethodDef ',
|
|
}
|
|
INLINE = {
|
|
# Modules/_tracemalloc.c
|
|
'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
|
|
# Modules/faulthandler.c
|
|
'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
|
|
'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
|
|
# Modules/signalmodule.c
|
|
'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
|
|
'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
|
|
# Python/dynload_shlib.c
|
|
'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
|
|
# Objects/obmalloc.c
|
|
'_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
|
|
# Python/bootstrap_hash.c
|
|
'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
|
|
}
|
|
FUNC = {
|
|
# Objects/object.c
|
|
'_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
|
|
# Parser/myreadline.c
|
|
'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
|
|
# Python/pylifecycle.c
|
|
'_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
|
|
# Parser/myreadline.c
|
|
'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
|
|
}
|
|
IMPLIED = {
|
|
# Objects/boolobject.c
|
|
'_Py_FalseStruct': 'static struct _longobject ',
|
|
'_Py_TrueStruct': 'static struct _longobject ',
|
|
# Modules/config.c
|
|
'_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
|
|
}
|
|
GLOBALS = {}
|
|
GLOBALS.update(MULTILINE)
|
|
GLOBALS.update(INLINE)
|
|
GLOBALS.update(FUNC)
|
|
GLOBALS.update(IMPLIED)
|
|
|
|
LOCALS = {
|
|
'buildinfo': ('Modules/getbuildinfo.c',
|
|
'Py_GetBuildInfo',
|
|
'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
|
|
'methods': ('Python/codecs.c',
|
|
'_PyCodecRegistry_Init',
|
|
'static struct { char *name; PyMethodDef def; } methods[]'),
|
|
}
|
|
|
|
|
|
def _known(symbol):
|
|
if symbol.funcname:
|
|
if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
|
|
raise KeyError(symbol.name)
|
|
filename, funcname, decl = LOCALS[symbol.name]
|
|
varid = ID(filename, funcname, symbol.name)
|
|
elif not symbol.filename or symbol.filename == UNKNOWN:
|
|
raise KeyError(symbol.name)
|
|
else:
|
|
varid = symbol.id
|
|
try:
|
|
decl = GLOBALS[symbol.name]
|
|
except KeyError:
|
|
|
|
if symbol.name.endswith('_methods'):
|
|
decl = 'static PyMethodDef '
|
|
elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
|
|
decl = 'static PyTypeObject '
|
|
else:
|
|
raise
|
|
if symbol.name not in decl:
|
|
decl = decl + symbol.name
|
|
return Variable(varid, 'static', decl)
|
|
|
|
|
|
def known_row(varid, decl):
|
|
return (
|
|
varid.filename,
|
|
varid.funcname or '-',
|
|
varid.name,
|
|
'variable',
|
|
decl,
|
|
)
|
|
|
|
|
|
def known_rows(symbols, *,
|
|
cached=True,
|
|
_get_filenames=iter_cpython_files,
|
|
_find_match=find_matching_variable,
|
|
_find_symbols=find_variables,
|
|
_as_known=known_row,
|
|
):
|
|
filenames = list(_get_filenames())
|
|
cache = {}
|
|
if cached:
|
|
for symbol in symbols:
|
|
try:
|
|
found = _known(symbol)
|
|
except KeyError:
|
|
found = _find_match(symbol, cache, filenames)
|
|
if found is None:
|
|
found = Variable(symbol.id, UNKNOWN, UNKNOWN)
|
|
yield _as_known(found.id, found.vartype)
|
|
else:
|
|
raise NotImplementedError # XXX incorporate KNOWN
|
|
for variable in _find_symbols(symbols, filenames,
|
|
srccache=cache,
|
|
parse_variable=_parse_global,
|
|
):
|
|
#variable = variable._replace(
|
|
# filename=os.path.relpath(variable.filename, REPO_ROOT))
|
|
if variable.funcname == UNKNOWN:
|
|
print(variable)
|
|
if variable.vartype== UNKNOWN:
|
|
print(variable)
|
|
yield _as_known(variable.id, variable.vartype)
|
|
|
|
|
|
def generate(symbols, filename=None, *,
|
|
_generate_rows=known_rows,
|
|
_write_tsv=write_tsv,
|
|
):
|
|
if not filename:
|
|
filename = KNOWN_FILE + '.new'
|
|
|
|
rows = _generate_rows(symbols)
|
|
_write_tsv(filename, KNOWN_HEADER, rows)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from c_symbols import binary
|
|
symbols = binary.iter_symbols(
|
|
binary.PYTHON,
|
|
find_local_symbol=None,
|
|
)
|
|
generate(symbols)
|