You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
6.2 KiB
204 lines
6.2 KiB
#------------------------------------------------------------------------------
|
|
# pycparser: c_json.py
|
|
#
|
|
# by Michael White (@mypalmike)
|
|
#
|
|
# This example includes functions to serialize and deserialize an ast
|
|
# to and from json format. Serializing involves walking the ast and converting
|
|
# each node from a python Node object into a python dict. Deserializing
|
|
# involves the opposite conversion, walking the tree formed by the
|
|
# dict and converting each dict into the specific Node object it represents.
|
|
# The dict itself is serialized and deserialized using the python json module.
|
|
#
|
|
# The dict representation is a fairly direct transformation of the object
|
|
# attributes. Each node in the dict gets one metadata field referring to the
|
|
# specific node class name, _nodetype. Each local attribute (i.e. not linking
|
|
# to child nodes) has a string value or array of string values. Each child
|
|
# attribute is either another dict or an array of dicts, exactly as in the
|
|
# Node object representation. The "coord" attribute, representing the
|
|
# node's location within the source code, is serialized/deserialized from
|
|
# a Coord object into a string of the format "filename:line[:column]".
|
|
#
|
|
# Example TypeDecl node, with IdentifierType child node, represented as a dict:
|
|
# "type": {
|
|
# "_nodetype": "TypeDecl",
|
|
# "coord": "c_files/funky.c:8",
|
|
# "declname": "o",
|
|
# "quals": [],
|
|
# "type": {
|
|
# "_nodetype": "IdentifierType",
|
|
# "coord": "c_files/funky.c:8",
|
|
# "names": [
|
|
# "char"
|
|
# ]
|
|
# }
|
|
# }
|
|
#------------------------------------------------------------------------------
|
|
from __future__ import print_function
|
|
|
|
import json
|
|
import sys
|
|
import re
|
|
|
|
# This is not required if you've installed pycparser into
|
|
# your site-packages/ with setup.py
|
|
#
|
|
sys.path.extend(['.', '..'])
|
|
|
|
from pycparser import parse_file, c_ast
|
|
from pycparser.plyparser import Coord
|
|
|
|
|
|
RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
|
|
RE_INTERNAL_ATTR = re.compile('__.*__')
|
|
|
|
|
|
class CJsonError(Exception):
|
|
pass
|
|
|
|
|
|
def memodict(fn):
|
|
""" Fast memoization decorator for a function taking a single argument """
|
|
class memodict(dict):
|
|
def __missing__(self, key):
|
|
ret = self[key] = fn(key)
|
|
return ret
|
|
return memodict().__getitem__
|
|
|
|
|
|
@memodict
|
|
def child_attrs_of(klass):
|
|
"""
|
|
Given a Node class, get a set of child attrs.
|
|
Memoized to avoid highly repetitive string manipulation
|
|
|
|
"""
|
|
non_child_attrs = set(klass.attr_names)
|
|
all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
|
|
return all_attrs - non_child_attrs
|
|
|
|
|
|
def to_dict(node):
|
|
""" Recursively convert an ast into dict representation. """
|
|
klass = node.__class__
|
|
|
|
result = {}
|
|
|
|
# Metadata
|
|
result['_nodetype'] = klass.__name__
|
|
|
|
# Local node attributes
|
|
for attr in klass.attr_names:
|
|
result[attr] = getattr(node, attr)
|
|
|
|
# Coord object
|
|
if node.coord:
|
|
result['coord'] = str(node.coord)
|
|
else:
|
|
result['coord'] = None
|
|
|
|
# Child attributes
|
|
for child_name, child in node.children():
|
|
# Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
|
|
match = RE_CHILD_ARRAY.match(child_name)
|
|
if match:
|
|
array_name, array_index = match.groups()
|
|
array_index = int(array_index)
|
|
# arrays come in order, so we verify and append.
|
|
result[array_name] = result.get(array_name, [])
|
|
if array_index != len(result[array_name]):
|
|
raise CJsonError('Internal ast error. Array {} out of order. '
|
|
'Expected index {}, got {}'.format(
|
|
array_name, len(result[array_name]), array_index))
|
|
result[array_name].append(to_dict(child))
|
|
else:
|
|
result[child_name] = to_dict(child)
|
|
|
|
# Any child attributes that were missing need "None" values in the json.
|
|
for child_attr in child_attrs_of(klass):
|
|
if child_attr not in result:
|
|
result[child_attr] = None
|
|
|
|
return result
|
|
|
|
|
|
def to_json(node, **kwargs):
|
|
""" Convert ast node to json string """
|
|
return json.dumps(to_dict(node), **kwargs)
|
|
|
|
|
|
def file_to_dict(filename):
|
|
""" Load C file into dict representation of ast """
|
|
ast = parse_file(filename, use_cpp=True)
|
|
return to_dict(ast)
|
|
|
|
|
|
def file_to_json(filename, **kwargs):
|
|
""" Load C file into json string representation of ast """
|
|
ast = parse_file(filename, use_cpp=True)
|
|
return to_json(ast, **kwargs)
|
|
|
|
|
|
def _parse_coord(coord_str):
|
|
""" Parse coord string (file:line[:column]) into Coord object. """
|
|
if coord_str is None:
|
|
return None
|
|
|
|
vals = coord_str.split(':')
|
|
vals.extend([None] * 3)
|
|
filename, line, column = vals[:3]
|
|
return Coord(filename, line, column)
|
|
|
|
|
|
def _convert_to_obj(value):
|
|
"""
|
|
Convert an object in the dict representation into an object.
|
|
Note: Mutually recursive with from_dict.
|
|
|
|
"""
|
|
value_type = type(value)
|
|
if value_type == dict:
|
|
return from_dict(value)
|
|
elif value_type == list:
|
|
return [_convert_to_obj(item) for item in value]
|
|
else:
|
|
# String
|
|
return value
|
|
|
|
|
|
def from_dict(node_dict):
|
|
""" Recursively build an ast from dict representation """
|
|
class_name = node_dict.pop('_nodetype')
|
|
|
|
klass = getattr(c_ast, class_name)
|
|
|
|
# Create a new dict containing the key-value pairs which we can pass
|
|
# to node constructors.
|
|
objs = {}
|
|
for key, value in node_dict.items():
|
|
if key == 'coord':
|
|
objs[key] = _parse_coord(value)
|
|
else:
|
|
objs[key] = _convert_to_obj(value)
|
|
|
|
# Use keyword parameters, which works thanks to beautifully consistent
|
|
# ast Node initializers.
|
|
return klass(**objs)
|
|
|
|
|
|
def from_json(ast_json):
|
|
""" Build an ast from json string representation """
|
|
return from_dict(json.loads(ast_json))
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1:
|
|
# Some test code...
|
|
# Do trip from C -> ast -> dict -> ast -> json, then print.
|
|
ast_dict = file_to_dict(sys.argv[1])
|
|
ast = from_dict(ast_dict)
|
|
print(to_json(ast, sort_keys=True, indent=4))
|
|
else:
|
|
print("Please provide a filename as argument")
|