You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
4.5 KiB
142 lines
4.5 KiB
# -*- coding: utf-8 -*-
|
|
# The LLVM Compiler Infrastructure
|
|
#
|
|
# This file is distributed under the University of Illinois Open Source
|
|
# License. See LICENSE.TXT for details.
|
|
""" This module is responsible for to parse a compiler invocation. """
|
|
|
|
import re
|
|
import os
|
|
import collections
|
|
|
|
__all__ = ['split_command', 'classify_source', 'compiler_language']
|
|
|
|
# Ignored compiler options map for compilation database creation.
|
|
# The map is used in `split_command` method. (Which does ignore and classify
|
|
# parameters.) Please note, that these are not the only parameters which
|
|
# might be ignored.
|
|
#
|
|
# Keys are the option name, value number of options to skip
|
|
IGNORED_FLAGS = {
|
|
# compiling only flag, ignored because the creator of compilation
|
|
# database will explicitly set it.
|
|
'-c': 0,
|
|
# preprocessor macros, ignored because would cause duplicate entries in
|
|
# the output (the only difference would be these flags). this is actual
|
|
# finding from users, who suffered longer execution time caused by the
|
|
# duplicates.
|
|
'-MD': 0,
|
|
'-MMD': 0,
|
|
'-MG': 0,
|
|
'-MP': 0,
|
|
'-MF': 1,
|
|
'-MT': 1,
|
|
'-MQ': 1,
|
|
# linker options, ignored because for compilation database will contain
|
|
# compilation commands only. so, the compiler would ignore these flags
|
|
# anyway. the benefit to get rid of them is to make the output more
|
|
# readable.
|
|
'-static': 0,
|
|
'-shared': 0,
|
|
'-s': 0,
|
|
'-rdynamic': 0,
|
|
'-l': 1,
|
|
'-L': 1,
|
|
'-u': 1,
|
|
'-z': 1,
|
|
'-T': 1,
|
|
'-Xlinker': 1
|
|
}
|
|
|
|
# Known C/C++ compiler executable name patterns
|
|
COMPILER_PATTERNS = frozenset([
|
|
re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
|
|
re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
|
|
re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
|
|
re.compile(r'^llvm-g(cc|\+\+)$'),
|
|
])
|
|
|
|
|
|
def split_command(command):
|
|
""" Returns a value when the command is a compilation, None otherwise.
|
|
|
|
The value on success is a named tuple with the following attributes:
|
|
|
|
files: list of source files
|
|
flags: list of compile options
|
|
compiler: string value of 'c' or 'c++' """
|
|
|
|
# the result of this method
|
|
result = collections.namedtuple('Compilation',
|
|
['compiler', 'flags', 'files'])
|
|
result.compiler = compiler_language(command)
|
|
result.flags = []
|
|
result.files = []
|
|
# quit right now, if the program was not a C/C++ compiler
|
|
if not result.compiler:
|
|
return None
|
|
# iterate on the compile options
|
|
args = iter(command[1:])
|
|
for arg in args:
|
|
# quit when compilation pass is not involved
|
|
if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
|
|
return None
|
|
# ignore some flags
|
|
elif arg in IGNORED_FLAGS:
|
|
count = IGNORED_FLAGS[arg]
|
|
for _ in range(count):
|
|
next(args)
|
|
elif re.match(r'^-(l|L|Wl,).+', arg):
|
|
pass
|
|
# some parameters could look like filename, take as compile option
|
|
elif arg in {'-D', '-I'}:
|
|
result.flags.extend([arg, next(args)])
|
|
# parameter which looks source file is taken...
|
|
elif re.match(r'^[^-].+', arg) and classify_source(arg):
|
|
result.files.append(arg)
|
|
# and consider everything else as compile option.
|
|
else:
|
|
result.flags.append(arg)
|
|
# do extra check on number of source files
|
|
return result if result.files else None
|
|
|
|
|
|
def classify_source(filename, c_compiler=True):
|
|
""" Return the language from file name extension. """
|
|
|
|
mapping = {
|
|
'.c': 'c' if c_compiler else 'c++',
|
|
'.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
|
|
'.ii': 'c++-cpp-output',
|
|
'.m': 'objective-c',
|
|
'.mi': 'objective-c-cpp-output',
|
|
'.mm': 'objective-c++',
|
|
'.mii': 'objective-c++-cpp-output',
|
|
'.C': 'c++',
|
|
'.cc': 'c++',
|
|
'.CC': 'c++',
|
|
'.cp': 'c++',
|
|
'.cpp': 'c++',
|
|
'.cxx': 'c++',
|
|
'.c++': 'c++',
|
|
'.C++': 'c++',
|
|
'.txx': 'c++'
|
|
}
|
|
|
|
__, extension = os.path.splitext(os.path.basename(filename))
|
|
return mapping.get(extension)
|
|
|
|
|
|
def compiler_language(command):
|
|
""" A predicate to decide the command is a compiler call or not.
|
|
|
|
Returns 'c' or 'c++' when it match. None otherwise. """
|
|
|
|
cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
|
|
|
|
if command:
|
|
executable = os.path.basename(command[0])
|
|
if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
|
|
return 'c++' if cplusplus.match(executable) else 'c'
|
|
return None
|