You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
228 lines
8.4 KiB
228 lines
8.4 KiB
#!/usr/bin/env python3
|
|
# Copyright 2019 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
################################################################################
|
|
"""Use git bisect to find the Clang/LLVM commit causing a regression."""
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
def execute(command, *args, expect_zero=True, **kwargs):
|
|
"""Execute |command| and return the returncode, stdout and stderr."""
|
|
kwargs['stdout'] = subprocess.PIPE
|
|
kwargs['stderr'] = subprocess.PIPE
|
|
logging.debug('Running command: "%s"', str(command))
|
|
process = subprocess.Popen(command, *args, **kwargs)
|
|
stdout, stderr = process.communicate()
|
|
stdout = stdout.decode('utf-8')
|
|
stderr = stderr.decode('utf-8')
|
|
retcode = process.returncode
|
|
logging.info('Command: "%s" returned: %d.\nStdout: %s.\nStderr: %s',
|
|
str(command), retcode, stdout, stderr)
|
|
if expect_zero and retcode != 0:
|
|
raise subprocess.CalledProcessError(retcode, command)
|
|
return retcode, stdout, stderr
|
|
|
|
|
|
def search_bisect_output(output):
|
|
"""Search |output| for a message indicating the culprit commit has been
|
|
found."""
|
|
# TODO(metzman): Is it necessary to look for "good"?
|
|
culprit_regex = re.compile('([a-z0-9]{40}) is the first (good|bad) commit')
|
|
match = re.match(culprit_regex, output)
|
|
return match.group(1) if match is not None else None
|
|
|
|
|
|
class GitRepo:
|
|
"""Class for executing commmands on a git repo."""
|
|
|
|
def __init__(self, repo_dir):
|
|
self.repo_dir = repo_dir
|
|
|
|
def do_command(self, git_subcommand):
|
|
"""Execute a |git_subcommand| (a list of strings)."""
|
|
command = ['git', '-C', self.repo_dir] + git_subcommand
|
|
return execute(command)
|
|
|
|
def test_commit(self, test_command):
|
|
"""Build LLVM at the currently checkedout commit, then run |test_command|.
|
|
If returncode is 0 run 'git bisect good' otherwise return 'git bisect bad'.
|
|
Return None if bisect didn't finish yet. Return the culprit commit if it
|
|
does."""
|
|
build_clang(self.repo_dir)
|
|
retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
|
|
if retcode == 0:
|
|
retcode, stdout, _ = self.do_bisect_command('good')
|
|
else:
|
|
retcode, stdout, _ = self.do_bisect_command('bad')
|
|
return search_bisect_output(stdout)
|
|
|
|
def bisect(self, good_commit, bad_commit, test_command):
|
|
"""Do git bisect assuming |good_commit| is good, |bad_commit| is bad and
|
|
|test_command| is an oracle. Return the culprit commit."""
|
|
self.bisect_start(good_commit, bad_commit, test_command)
|
|
result = self.test_commit(test_command)
|
|
while result is None:
|
|
result = self.test_commit(test_command)
|
|
return result
|
|
|
|
def bisect_start(self, good_commit, bad_commit, test_command):
|
|
"""Start doing git bisect."""
|
|
self.do_bisect_command('start')
|
|
# Do bad commit first since it is more likely to be recent.
|
|
self.test_start_commit(bad_commit, 'bad', test_command)
|
|
self.test_start_commit(good_commit, 'good', test_command)
|
|
|
|
def do_bisect_command(self, subcommand):
|
|
"""Execute a git bisect |subcommand| (string) and return the result."""
|
|
return self.do_command(['bisect', subcommand])
|
|
|
|
def test_start_commit(self, commit, label, test_command):
|
|
"""Use |test_command| to test the first good or bad |commit| (depending on
|
|
|label|)."""
|
|
assert label in ('good', 'bad'), label
|
|
self.do_command(['checkout', commit])
|
|
build_clang(self.repo_dir)
|
|
retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
|
|
if label == 'good' and retcode != 0:
|
|
raise BisectError('Test command "%s" returns %d on first good commit %s' %
|
|
(test_command, retcode, commit))
|
|
if label == 'bad' and retcode == 0:
|
|
raise BisectError('Test command "%s" returns %d on first bad commit %s' %
|
|
(test_command, retcode, commit))
|
|
|
|
self.do_bisect_command(label)
|
|
|
|
|
|
class BisectError(Exception):
|
|
"""Error that was encountered during bisection."""
|
|
|
|
|
|
def get_clang_build_env():
|
|
"""Get an environment for building Clang."""
|
|
env = os.environ.copy()
|
|
for variable in ['CXXFLAGS', 'CFLAGS']:
|
|
if variable in env:
|
|
del env[variable]
|
|
return env
|
|
|
|
|
|
def install_clang_build_deps():
|
|
"""Instal dependencies necessary to build clang."""
|
|
execute([
|
|
'apt-get', 'install', '-y', 'build-essential', 'make', 'cmake',
|
|
'ninja-build', 'git', 'subversion', 'g++-multilib'
|
|
])
|
|
|
|
|
|
def clone_with_retries(repo, local_path, num_retries=10):
|
|
"""Clone |repo| to |local_path| if it doesn't exist already. Try up to
|
|
|num_retries| times. Return False if unable to checkout."""
|
|
if os.path.isdir(local_path):
|
|
return
|
|
for _ in range(num_retries):
|
|
if os.path.isdir(local_path):
|
|
shutil.rmtree(local_path)
|
|
retcode, _, _ = execute(['git', 'clone', repo, local_path],
|
|
expect_zero=False)
|
|
if retcode == 0:
|
|
return
|
|
raise Exception('Could not checkout %s.' % repo)
|
|
|
|
|
|
def get_clang_target_arch():
|
|
"""Get target architecture we want clang to target when we build it."""
|
|
_, arch, _ = execute(['uname', '-m'])
|
|
if 'x86_64' in arch:
|
|
return 'X86'
|
|
if 'aarch64' in arch:
|
|
return 'AArch64'
|
|
raise Exception('Unsupported target: %s.' % arch)
|
|
|
|
|
|
def prepare_build(llvm_project_path):
|
|
"""Prepare to build clang."""
|
|
llvm_build_dir = os.path.join(os.getenv('WORK'), 'llvm-build')
|
|
if not os.path.exists(llvm_build_dir):
|
|
os.mkdir(llvm_build_dir)
|
|
execute([
|
|
'cmake', '-G', 'Ninja', '-DLIBCXX_ENABLE_SHARED=OFF',
|
|
'-DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON', '-DLIBCXXABI_ENABLE_SHARED=OFF',
|
|
'-DCMAKE_BUILD_TYPE=Release',
|
|
'-DLLVM_ENABLE_PROJECTS=libcxx;libcxxabi;compiler-rt;clang',
|
|
'-DLLVM_TARGETS_TO_BUILD=' + get_clang_target_arch(),
|
|
os.path.join(llvm_project_path, 'llvm')
|
|
],
|
|
env=get_clang_build_env(),
|
|
cwd=llvm_build_dir)
|
|
return llvm_build_dir
|
|
|
|
|
|
def build_clang(llvm_project_path):
|
|
"""Checkout, build and install Clang."""
|
|
# TODO(metzman): Merge Python checkout and build code with
|
|
# checkout_build_install_llvm.sh.
|
|
# TODO(metzman): Look into speeding this process using ccache.
|
|
# TODO(metzman): Make this program capable of handling MSAN and i386 Clang
|
|
# regressions.
|
|
llvm_build_dir = prepare_build(llvm_project_path)
|
|
execute(['ninja', '-C', llvm_build_dir, 'install'], env=get_clang_build_env())
|
|
|
|
|
|
def find_culprit_commit(test_command, good_commit, bad_commit):
|
|
"""Returns the culprit LLVM commit that introduced a bug revealed by running
|
|
|test_command|. Uses git bisect and treats |good_commit| as the first latest
|
|
known good commit and |bad_commit| as the first known bad commit."""
|
|
llvm_project_path = os.path.join(os.getenv('SRC'), 'llvm-project')
|
|
clone_with_retries('https://github.com/llvm/llvm-project.git',
|
|
llvm_project_path)
|
|
git_repo = GitRepo(llvm_project_path)
|
|
result = git_repo.bisect(good_commit, bad_commit, test_command)
|
|
print('Culprit commit', result)
|
|
return result
|
|
|
|
|
|
def main():
|
|
# pylint: disable=line-too-long
|
|
"""Finds the culprit LLVM commit that introduced a clang regression.
|
|
Can be tested using this command in a libsodium shell:
|
|
python3 bisect_clang.py "cd /src/libsodium; make clean; cd -; compile && /out/secret_key_auth_fuzzer -runs=100" \
|
|
f7e52fbdb5a7af8ea0808e98458b497125a5eca1 \
|
|
8288453f6aac05080b751b680455349e09d49825
|
|
"""
|
|
# pylint: enable=line-too-long
|
|
# TODO(metzman): Check CFLAGS for things like -fsanitize=fuzzer-no-link.
|
|
# TODO(metzman): Allow test_command to be optional and for just build.sh to be
|
|
# used instead.
|
|
test_command = sys.argv[1]
|
|
# TODO(metzman): Add in more automation so that the script can automatically
|
|
# determine the commits used in last Clang roll.
|
|
good_commit = sys.argv[2]
|
|
bad_commit = sys.argv[3]
|
|
# TODO(metzman): Make verbosity configurable.
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
install_clang_build_deps()
|
|
find_culprit_commit(test_command, good_commit, bad_commit)
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|