You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1000 lines
30 KiB

#!/usr/bin/env python3
#
# Copyright (C) 2018 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""This module implements a Android.bp parser."""
import collections
import glob
import itertools
import os
import re
import sys
#------------------------------------------------------------------------------
# Python 2 compatibility
#------------------------------------------------------------------------------
if sys.version_info >= (3,):
py3_chr = chr # pylint: disable=invalid-name
else:
def py3_chr(codepoint):
"""Convert an integer character codepoint into a utf-8 string."""
return unichr(codepoint).encode('utf-8')
try:
from enum import Enum
except ImportError:
class _Enum(object): # pylint: disable=too-few-public-methods
"""A name-value pair for each enumeration."""
__slot__ = ('name', 'value')
def __init__(self, name, value):
"""Create a name-value pair."""
self.name = name
self.value = value
def __repr__(self):
"""Return the name of the enumeration."""
return self.name
class _EnumMeta(type): # pylint: disable=too-few-public-methods
"""Metaclass for Enum base class."""
def __new__(mcs, name, bases, attrs):
"""Collects enumerations from attributes of the derived classes."""
enums = []
new_attrs = {'_enums': enums}
for key, value in attrs.iteritems():
if key.startswith('_'):
new_attrs[key] = value
else:
item = _Enum(key, value)
enums.append(item)
new_attrs[key] = item
return type.__new__(mcs, name, bases, new_attrs)
def __iter__(cls):
"""Iterate the list of enumerations."""
return iter(cls._enums)
class Enum(object): # pylint: disable=too-few-public-methods
"""Enum base class."""
__metaclass__ = _EnumMeta
#------------------------------------------------------------------------------
# Lexer
#------------------------------------------------------------------------------
class Token(Enum): # pylint: disable=too-few-public-methods
"""Token enumerations."""
EOF = 0
IDENT = 1
LPAREN = 2
RPAREN = 3
LBRACKET = 4
RBRACKET = 5
LBRACE = 6
RBRACE = 7
COLON = 8
ASSIGN = 9
ASSIGNPLUS = 10
PLUS = 11
COMMA = 12
STRING = 13
INTEGER = 14
COMMENT = 15
SPACE = 16
class LexerError(ValueError):
"""Lexer error exception class."""
def __init__(self, buf, pos, message):
"""Create a lexer error exception object."""
super(LexerError, self).__init__(message)
self.message = message
self.line, self.column = Lexer.compute_line_column(buf, pos)
def __str__(self):
"""Convert lexer error to string representation."""
return 'LexerError: {}:{}: {}'.format(
self.line, self.column, self.message)
class Lexer(object):
"""Lexer to tokenize the input string."""
def __init__(self, buf, offset=0, path=None):
"""Tokenize the source code in buf starting from offset.
Args:
buf (string) The source code to be tokenized.
offset (int) The position to start.
"""
self.buf = buf
self.start = None
self.end = offset
self.token = None
self.literal = None
self.path = path
self._next()
def consume(self, *tokens):
"""Consume one or more token."""
for token in tokens:
if token == self.token:
self._next()
else:
raise LexerError(self.buf, self.start,
'unexpected token ' + self.token.name)
def _next(self):
"""Read next non-comment non-space token."""
buf_len = len(self.buf)
while self.end < buf_len:
self.start = self.end
self.token, self.end, self.literal = self.lex(self.buf, self.start)
if self.token != Token.SPACE and self.token != Token.COMMENT:
return
self.start = self.end
self.token = Token.EOF
self.literal = None
@staticmethod
def compute_line_column(buf, pos):
"""Compute the line number and the column number of a given position in
the buffer."""
prior = buf[0:pos]
newline_pos = prior.rfind('\n')
if newline_pos == -1:
return (1, pos + 1)
return (prior.count('\n') + 1, pos - newline_pos)
UNICODE_CHARS_PATTERN = re.compile('[^\\\\\\n"]+')
ESCAPE_CHAR_TABLE = {
'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
'v': '\v', '\\': '\\', '\'': '\'', '\"': '\"',
}
OCT_TABLE = {str(i) for i in range(8)}
@staticmethod
def decode_oct(buf, offset, start, end):
"""Read characters from buf[start:end] and interpret them as an octal
integer."""
if end > len(buf):
raise LexerError(buf, offset, 'bad octal escape sequence')
try:
codepoint = int(buf[start:end], 8)
except ValueError:
raise LexerError(buf, offset, 'bad octal escape sequence')
if codepoint > 0xff:
raise LexerError(buf, offset, 'bad octal escape sequence')
return codepoint
@staticmethod
def decode_hex(buf, offset, start, end):
"""Read characters from buf[start:end] and interpret them as a
hexadecimal integer."""
if end > len(buf):
raise LexerError(buf, offset, 'bad hex escape sequence')
try:
return int(buf[start:end], 16)
except ValueError:
raise LexerError(buf, offset, 'bad hex escape sequence')
@classmethod
def lex_interpreted_string(cls, buf, offset):
"""Tokenize a golang interpreted string.
Args:
buf (str) The source code buffer.
offset (int) The position to find a golang interpreted string
literal.
Returns:
A tuple with the end of matched buffer and the interpreted string
literal.
"""
buf_len = len(buf)
pos = offset + 1
literal = ''
while pos < buf_len:
# Match unicode characters
match = cls.UNICODE_CHARS_PATTERN.match(buf, pos)
if match:
literal += match.group(0)
pos = match.end()
# Read the next character
try:
char = buf[pos]
except IndexError:
raise LexerError(buf, pos,
'unclosed interpreted string literal')
if char == '\\':
# Escape sequences
try:
char = buf[pos + 1]
except IndexError:
raise LexerError(buf, pos, 'bad escape sequence')
if char in cls.OCT_TABLE:
literal += chr(cls.decode_oct(buf, pos, pos + 1, pos + 4))
pos += 4
elif char == 'x':
literal += chr(cls.decode_hex(buf, pos, pos + 2, pos + 4))
pos += 4
elif char == 'u':
literal += py3_chr(
cls.decode_hex(buf, pos, pos + 2, pos + 6))
pos += 6
elif char == 'U':
literal += py3_chr(
cls.decode_hex(buf, pos, pos + 2, pos + 10))
pos += 10
else:
try:
literal += cls.ESCAPE_CHAR_TABLE[char]
pos += 2
except KeyError:
raise LexerError(buf, pos, 'bad escape sequence')
continue
if char == '"':
# End of string literal
return (pos + 1, literal)
raise LexerError(buf, pos, 'unclosed interpreted string literal')
@classmethod
def lex_string(cls, buf, offset):
"""Tokenize a golang string literal.
Args:
buf (str) The source code buffer.
offset (int) The position to find a golang string literal.
Returns:
A tuple with the end of matched buffer and the interpreted string
literal.
"""
char = buf[offset]
if char == '`':
try:
end = buf.index('`', offset + 1)
return (end + 1, buf[offset + 1 : end])
except ValueError:
raise LexerError(buf, len(buf), 'unclosed raw string literal')
if char == '"':
return cls.lex_interpreted_string(buf, offset)
raise LexerError(buf, offset, 'no string literal start character')
LEXER_PATTERNS = (
(Token.IDENT, '[A-Za-z_][0-9A-Za-z_]*'),
(Token.LPAREN, '\\('),
(Token.RPAREN, '\\)'),
(Token.LBRACKET, '\\['),
(Token.RBRACKET, '\\]'),
(Token.LBRACE, '\\{'),
(Token.RBRACE, '\\}'),
(Token.COLON, ':'),
(Token.ASSIGN, '='),
(Token.ASSIGNPLUS, '\\+='),
(Token.PLUS, '\\+'),
(Token.COMMA, ','),
(Token.STRING, '["`]'),
(Token.INTEGER, '-{0,1}[0-9]+'),
(Token.COMMENT,
'/(?:(?:/[^\\n]*)|(?:\\*(?:(?:[^*]*)|(?:\\*+[^/*]))*\\*+/))'),
(Token.SPACE, '\\s+'),
)
LEXER_MATCHER = re.compile('|'.join(
'(' + pattern + ')' for _, pattern in LEXER_PATTERNS))
@classmethod
def lex(cls, buf, offset):
"""Tokenize a token from buf[offset].
Args:
buf (string) The source code buffer.
offset (int) The position to find and tokenize a token.
Return:
A tuple with three elements. The first element is the token id.
The second element is the end of the token. The third element is
the value for strings or identifiers.
"""
match = cls.LEXER_MATCHER.match(buf, offset)
if not match:
raise LexerError(buf, offset, 'unknown token')
token = cls.LEXER_PATTERNS[match.lastindex - 1][0]
if token == Token.STRING:
end, literal = cls.lex_string(buf, offset)
else:
end = match.end()
if token in {Token.IDENT, Token.INTEGER}:
literal = buf[offset:end]
else:
literal = None
return (token, end, literal)
#------------------------------------------------------------------------------
# AST
#------------------------------------------------------------------------------
class Expr(object): # pylint: disable=too-few-public-methods
"""Base class for all expressions."""
def eval(self, env):
"""Evaluate the expression under an environment."""
raise NotImplementedError()
class String(Expr, str):
"""String constant literal."""
def eval(self, env):
"""Evaluate the string expression under an environment."""
return self
class Bool(Expr): # pylint: disable=too-few-public-methods
"""Boolean constant literal."""
__slots__ = ('value',)
def __init__(self, value):
"""Create a boolean constant literal."""
self.value = value
def __repr__(self):
"""Convert a boolean constant literal to string representation."""
return repr(self.value)
def __bool__(self):
"""Convert boolean constant literal to Python bool type."""
return self.value
__nonzero__ = __bool__
def __eq__(self, rhs):
"""Compare whether two instances are equal."""
return self.value == rhs.value
def __hash__(self):
"""Compute the hashed value."""
return hash(self.value)
def eval(self, env):
"""Evaluate the boolean expression under an environment."""
return self
class Integer(Expr): # pylint: disable=too-few-public-methods
"""Integer constant literal."""
__slots__ = ('value',)
def __init__(self, value):
"""Create an integer constant literal."""
self.value = value
def __repr__(self):
"""Convert an integer constant literal to string representation."""
return repr(self.value)
def __bool__(self):
"""Convert an integer constant literal to Python bool type."""
return bool(self.value)
__nonzero__ = __bool__
def __int__(self):
"""Convert an integer constant literal to Python int type."""
return self.value
def __eq__(self, rhs):
"""Compare whether two instances are equal."""
return self.value == rhs.value
def __hash__(self):
"""Compute the hashed value."""
return hash(self.value)
def eval(self, env):
"""Evaluate the integer expression under an environment."""
return self
class VarRef(Expr): # pylint: disable=too-few-public-methods
"""A reference to a variable."""
def __init__(self, name, value):
"""Create a variable reference with a name and the value under static
scoping."""
self.name = name
self.value = value
def __repr__(self):
"""Convert a variable reference to string representation."""
return self.name
def eval(self, env):
"""Evaluate the identifier under an environment."""
if self.value is None:
return env[self.name].eval(env)
return self.value.eval(env)
class List(Expr, list):
"""List expression."""
def eval(self, env):
"""Evaluate list elements under an environment."""
return List(item.eval(env) for item in self)
class Dict(Expr, collections.OrderedDict):
"""Dictionary expression."""
def __repr__(self):
attrs = ', '.join(key + ': ' + repr(value)
for key, value in self.items())
return '{' + attrs + '}'
def eval(self, env):
"""Evaluate dictionary values under an environment."""
return Dict((key, value.eval(env)) for key, value in self.items())
class Concat(Expr): # pylint: disable=too-few-public-methods
"""List/string/integer plus operator."""
__slots__ = ('lhs', 'rhs')
def __init__(self, lhs, rhs):
"""Create a list/string/integer plus expression."""
self.lhs = lhs
self.rhs = rhs
def __repr__(self):
return '(' + repr(self.lhs) + ' + ' + repr(self.rhs) + ')'
def eval(self, env):
"""Evaluate list/string/integer plus operator under an environment."""
lhs = self.lhs.eval(env)
rhs = self.rhs.eval(env)
if isinstance(lhs, List) and isinstance(rhs, List):
return List(itertools.chain(lhs, rhs))
if isinstance(lhs, String) and isinstance(rhs, String):
return String(lhs + rhs)
if isinstance(lhs, Integer) and isinstance(rhs, Integer):
return Integer(int(lhs) + int(rhs))
raise TypeError('bad plus operands')
#------------------------------------------------------------------------------
# Parser
#------------------------------------------------------------------------------
class ParseError(ValueError):
"""Parser error exception class."""
def __init__(self, lexer, message):
"""Create a parser error exception object."""
super(ParseError, self).__init__(message)
self.message = message
self.line, self.column = \
Lexer.compute_line_column(lexer.buf, lexer.start)
def __str__(self):
"""Convert parser error to string representation."""
return 'ParseError: {}:{}: {}'.format(
self.line, self.column, self.message)
class Parser(object):
"""Parser to parse Android.bp files."""
def __init__(self, lexer, inherited_env=None):
"""Initialize the parser with the lexer."""
self.lexer = lexer
self.var_defs = []
self.vars = {} if inherited_env is None else dict(inherited_env)
self.modules = []
def parse(self):
"""Parse AST from tokens."""
lexer = self.lexer
while lexer.token != Token.EOF:
if lexer.token == Token.IDENT:
ident = self.parse_ident_lvalue()
if lexer.token in {Token.ASSIGN, Token.ASSIGNPLUS}:
self.parse_assign(ident, lexer.token)
elif lexer.token in {Token.LBRACE, Token.LPAREN}:
self.parse_module_definition(ident)
else:
raise ParseError(lexer,
'unexpected token ' + lexer.token.name)
else:
raise ParseError(lexer, 'unexpected token ' + lexer.token.name)
lexer.consume(Token.EOF)
def create_var_ref(self, name):
"""Create a variable reference."""
return VarRef(name, self.vars.get(name))
def define_var(self, name, value):
"""Define a variable."""
self.var_defs.append((name, value))
self.vars[name] = value
def parse_assign(self, ident, assign_token):
"""Parse an assignment statement."""
lexer = self.lexer
lexer.consume(assign_token)
value = self.parse_expression()
if assign_token == Token.ASSIGNPLUS:
value = Concat(self.create_var_ref(ident), value)
self.define_var(ident, value)
def parse_module_definition(self, module_ident):
"""Parse a module definition."""
properties = self.parse_dict()
properties['_path'] = String(self.lexer.path)
self.modules.append((module_ident, properties))
def parse_ident_lvalue(self):
"""Parse an identifier as an l-value."""
ident = self.lexer.literal
self.lexer.consume(Token.IDENT)
return ident
def parse_ident_rvalue(self):
"""Parse an identifier as a r-value.
Returns:
Returns VarRef if the literal is not 'true' nor 'false'.
Returns Bool(true/false) if the literal is either 'true' or 'false'.
"""
lexer = self.lexer
if lexer.literal in {'true', 'false'}:
result = Bool(lexer.literal == 'true')
else:
result = self.create_var_ref(lexer.literal)
lexer.consume(Token.IDENT)
return result
def parse_string(self):
"""Parse a string."""
lexer = self.lexer
string = String(lexer.literal)
lexer.consume(Token.STRING)
return string
def parse_integer(self):
"""Parse an integer."""
lexer = self.lexer
integer = Integer(int(lexer.literal))
lexer.consume(Token.INTEGER)
return integer
def parse_operand(self):
"""Parse an operand."""
lexer = self.lexer
token = lexer.token
if token == Token.STRING:
return self.parse_string()
if token == Token.IDENT:
return self.parse_ident_rvalue()
if token == Token.INTEGER:
return self.parse_integer()
if token == Token.LBRACKET:
return self.parse_list()
if token == Token.LBRACE:
return self.parse_dict()
if token == Token.LPAREN:
lexer.consume(Token.LPAREN)
operand = self.parse_expression()
lexer.consume(Token.RPAREN)
return operand
raise ParseError(lexer, 'unexpected token ' + token.name)
def parse_expression(self):
"""Parse an expression."""
lexer = self.lexer
expr = self.parse_operand()
while lexer.token == Token.PLUS:
lexer.consume(Token.PLUS)
expr = Concat(expr, self.parse_operand())
return expr
def parse_list(self):
"""Parse a list."""
result = List()
lexer = self.lexer
lexer.consume(Token.LBRACKET)
while lexer.token != Token.RBRACKET:
result.append(self.parse_expression())
if lexer.token == Token.COMMA:
lexer.consume(Token.COMMA)
lexer.consume(Token.RBRACKET)
return result
def parse_dict(self):
"""Parse a dict."""
result = Dict()
lexer = self.lexer
is_func_syntax = lexer.token == Token.LPAREN
if is_func_syntax:
lexer.consume(Token.LPAREN)
else:
lexer.consume(Token.LBRACE)
while lexer.token != Token.RBRACE and lexer.token != Token.RPAREN:
if lexer.token != Token.IDENT:
raise ParseError(lexer, 'unexpected token ' + lexer.token.name)
key = self.parse_ident_lvalue()
if lexer.token == Token.ASSIGN:
lexer.consume(Token.ASSIGN)
else:
lexer.consume(Token.COLON)
value = self.parse_expression()
result[key] = value
if lexer.token == Token.COMMA:
lexer.consume(Token.COMMA)
if is_func_syntax:
lexer.consume(Token.RPAREN)
else:
lexer.consume(Token.RBRACE)
return result
class RecursiveParser(object):
"""This is a recursive parser which will parse blueprint files
recursively."""
# Default Blueprint file name
_DEFAULT_SUB_NAME = 'Android.bp'
def __init__(self):
"""Initialize a recursive parser."""
self.visited = set()
self.modules = []
@staticmethod
def glob_sub_files(pattern, sub_file_name):
"""List the sub file paths that match with the pattern with
wildcards."""
for path in glob.glob(pattern):
if os.path.isfile(path):
if os.path.basename(path) == sub_file_name:
yield path
else:
sub_file_path = os.path.join(path, sub_file_name)
if os.path.isfile(sub_file_path):
yield sub_file_path
@classmethod
def find_sub_files_from_env(cls, rootdir, env, use_subdirs,
default_sub_name=_DEFAULT_SUB_NAME):
"""Find the sub files from the names specified in build, subdirs, and
optional_subdirs."""
subs = []
if 'build' in env:
subs.extend(os.path.join(rootdir, filename)
for filename in env['build'].eval(env))
if use_subdirs:
sub_name = env['subname'] if 'subname' in env else default_sub_name
if 'subdirs' in env:
for path in env['subdirs'].eval(env):
subs.extend(cls.glob_sub_files(os.path.join(rootdir, path),
sub_name))
if 'optional_subdirs' in env:
for path in env['optional_subdirs'].eval(env):
subs.extend(cls.glob_sub_files(os.path.join(rootdir, path),
sub_name))
return subs
@staticmethod
def _read_file(path, env):
"""Read a blueprint file and return modules and the environment."""
with open(path, 'r') as bp_file:
content = bp_file.read()
parser = Parser(Lexer(content, path=path), env)
parser.parse()
return (parser.modules, parser.vars)
def _parse_file(self, path, env, evaluate):
"""Parse a blueprint file and append to self.modules."""
modules, sub_env = self._read_file(path, env)
if evaluate:
modules = [(ident, attrs.eval(env)) for ident, attrs in modules]
self.modules += modules
return sub_env
def _parse_file_recursive(self, path, env, evaluate, use_subdirs):
"""Parse a blueprint file and recursively."""
self.visited.add(path)
sub_env = self._parse_file(path, env, evaluate)
rootdir = os.path.dirname(path)
sub_file_paths = self.find_sub_files_from_env(rootdir, sub_env,
use_subdirs)
sub_env.pop('build', None)
sub_env.pop('subdirs', None)
sub_env.pop('optional_subdirs', None)
for sub_file_path in sub_file_paths:
if sub_file_path not in self.visited:
self._parse_file_recursive(sub_file_path, sub_env, evaluate,
use_subdirs)
return sub_env
def _scan_and_parse_all_file_recursive(self, filename, path, env, evaluate):
"""Scan all files with the specified name and parse them."""
rootdir = os.path.dirname(path)
assert rootdir, 'rootdir is empty but must be non-empty'
envs = [(rootdir, env)]
assert env is not None
# Scan directories for all blueprint files
for basedir, dirnames, filenames in os.walk(rootdir):
# Drop irrelevant environments
while not basedir.startswith(envs[-1][0]):
envs.pop()
# Filter sub directories
if '.out-dir' in filenames:
# Stop at OUT_DIR
dirnames[:] = []
continue
new_dirnames = []
for name in dirnames:
if name in {'.git', '.repo'}:
continue
if basedir == rootdir and name == 'out':
continue
new_dirnames.append(name)
dirnames[:] = new_dirnames
# Parse blueprint files
if filename in filenames:
try:
path = os.path.join(basedir, filename)
sys.stdout.flush()
sub_env = self._parse_file_recursive(path, envs[-1][1],
evaluate, False)
assert sub_env is not None
envs.append((basedir, sub_env))
except IOError:
pass
def parse_file(self, path, env=None, evaluate=True,
default_sub_name=_DEFAULT_SUB_NAME):
"""Parse blueprint files recursively."""
if env is None:
env = {}
path = os.path.abspath(path)
sub_env = self._read_file(path, env)[1]
if 'subdirs' in sub_env or 'optional_subdirs' in sub_env:
self._parse_file_recursive(path, env, evaluate, True)
else:
self._scan_and_parse_all_file_recursive(
default_sub_name, path, env, evaluate)
#------------------------------------------------------------------------------
# Transformation
#------------------------------------------------------------------------------
def _build_named_modules_dict(modules):
"""Build a name-to-module dict."""
named_modules = {}
for i, (ident, attrs) in enumerate(modules):
name = attrs.get('name')
if name is not None:
named_modules[name] = [ident, attrs, i]
return named_modules
def _po_sorted_modules(modules, named_modules):
"""Sort modules in post order."""
modules = [(ident, attrs, i) for i, (ident, attrs) in enumerate(modules)]
# Build module dependency graph.
edges = {}
for ident, attrs, module_id in modules:
defaults = attrs.get('defaults')
if defaults:
edges[module_id] = set(
named_modules[default][2] for default in defaults)
# Traverse module graph in post order.
post_order = []
visited = set()
def _traverse(module_id):
visited.add(module_id)
for next_module_id in edges.get(module_id, []):
if next_module_id not in visited:
_traverse(next_module_id)
post_order.append(modules[module_id])
for module_id in range(len(modules)):
if module_id not in visited:
_traverse(module_id)
return post_order
def evaluate_default(attrs, default_attrs):
"""Add default attributes if the keys do not exist."""
for key, value in default_attrs.items():
if key not in attrs:
attrs[key] = value
else:
attrs_value = attrs[key]
if isinstance(value, Dict) and isinstance(attrs_value, Dict):
attrs[key] = evaluate_default(attrs_value, value)
return attrs
def evaluate_defaults(modules):
"""Add default attributes to all modules if the keys do not exist."""
named_modules = _build_named_modules_dict(modules)
for ident, attrs, i in _po_sorted_modules(modules, named_modules):
for default in attrs.get('defaults', []):
attrs = evaluate_default(attrs, named_modules[default][1])
modules[i] = (ident, attrs)
return modules
def fill_module_namespaces(root_bp, modules):
"""Collect soong_namespace definition and set a `_namespace` property to
each module definitions."""
# Collect all namespaces
rootdir = os.path.dirname(os.path.abspath(root_bp))
namespaces = {rootdir}
for ident, attrs in modules:
if ident == 'soong_namespace':
namespaces.add(os.path.dirname(attrs['_path']))
# Build a path matcher for module namespaces
namespaces = sorted(namespaces, reverse=True)
path_matcher = re.compile(
'|'.join('(' + re.escape(x) + '/.*)' for x in namespaces))
# Trim the root directory prefix
rootdir_prefix_len = len(rootdir) + 1
namespaces = [path[rootdir_prefix_len:] for path in namespaces]
# Fill in module namespaces
for ident, attrs in modules:
match = path_matcher.match(attrs['_path'])
attrs['_namespace'] = namespaces[match.lastindex - 1]
return modules