You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1793 lines
74 KiB
1793 lines
74 KiB
# Lint as: python2, python3
|
|
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import abc
|
|
import datetime
|
|
import difflib
|
|
import functools
|
|
import hashlib
|
|
import logging
|
|
import operator
|
|
import os
|
|
import re
|
|
import six
|
|
import sys
|
|
import warnings
|
|
|
|
import common
|
|
|
|
from autotest_lib.frontend.afe.json_rpc import proxy
|
|
from autotest_lib.client.common_lib import autotest_enum
|
|
from autotest_lib.client.common_lib import error
|
|
from autotest_lib.client.common_lib import global_config
|
|
from autotest_lib.client.common_lib import priorities
|
|
from autotest_lib.client.common_lib import time_utils
|
|
from autotest_lib.client.common_lib import utils
|
|
from autotest_lib.frontend.afe import model_attributes
|
|
from autotest_lib.frontend.afe.json_rpc import proxy
|
|
from autotest_lib.server.cros import provision
|
|
from autotest_lib.server.cros.dynamic_suite import constants
|
|
from autotest_lib.server.cros.dynamic_suite import control_file_getter
|
|
from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
|
|
from autotest_lib.server.cros.dynamic_suite import job_status
|
|
from autotest_lib.server.cros.dynamic_suite import suite_common
|
|
from autotest_lib.server.cros.dynamic_suite import tools
|
|
from autotest_lib.server.cros.dynamic_suite.job_status import Status
|
|
|
|
try:
|
|
from autotest_lib.server.cros.dynamic_suite import boolparse_lib
|
|
except ImportError as e:
|
|
print('Unable to import boolparse_lib: %s' % (e,))
|
|
print('This script must be either:')
|
|
print(' - Be run in the chroot.')
|
|
print(' - (not yet supported) be run after running ')
|
|
print(' ../utils/build_externals.py')
|
|
|
|
_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
|
|
'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
|
|
'sanity', 'push_to_prod']
|
|
_AUTOTEST_DIR = global_config.global_config.get_config_value(
|
|
'SCHEDULER', 'drone_installation_directory')
|
|
|
|
|
|
class RetryHandler(object):
|
|
"""Maintain retry information.
|
|
|
|
@var _retry_map: A dictionary that stores retry history.
|
|
The key is afe job id. The value is a dictionary.
|
|
{job_id: {'state':RetryHandler.States, 'retry_max':int}}
|
|
- state:
|
|
The retry state of a job.
|
|
NOT_ATTEMPTED:
|
|
We haven't done anything about the job.
|
|
ATTEMPTED:
|
|
We've made an attempt to schedule a retry job. The
|
|
scheduling may or may not be successful, e.g.
|
|
it might encounter an rpc error. Note failure
|
|
in scheduling a retry is different from a retry job failure.
|
|
For each job, we only attempt to schedule a retry once.
|
|
For example, assume we have a test with JOB_RETRIES=5 and
|
|
its second retry job failed. When we attempt to create
|
|
a third retry job to retry the second, we hit an rpc
|
|
error. In such case, we will give up on all following
|
|
retries.
|
|
RETRIED:
|
|
A retry job has already been successfully
|
|
scheduled.
|
|
- retry_max:
|
|
The maximum of times the job can still
|
|
be retried, taking into account retries
|
|
that have occurred.
|
|
@var _retry_level: A retry might be triggered only if the result
|
|
is worse than the level.
|
|
@var _max_retries: Maximum retry limit at suite level.
|
|
Regardless how many times each individual test
|
|
has been retried, the total number of retries happening in
|
|
the suite can't exceed _max_retries.
|
|
"""
|
|
|
|
States = autotest_enum.AutotestEnum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
|
|
start_value=1, step=1)
|
|
|
|
def __init__(self, initial_jobs_to_tests, retry_level='WARN',
|
|
max_retries=None):
|
|
"""Initialize RetryHandler.
|
|
|
|
@param initial_jobs_to_tests: A dictionary that maps a job id to
|
|
a ControlData object. This dictionary should contain
|
|
jobs that are originally scheduled by the suite.
|
|
@param retry_level: A retry might be triggered only if the result is
|
|
worse than the level.
|
|
@param max_retries: Integer, maxmium total retries allowed
|
|
for the suite. Default to None, no max.
|
|
"""
|
|
self._retry_map = {}
|
|
self._retry_level = retry_level
|
|
self._max_retries = (max_retries
|
|
if max_retries is not None else sys.maxsize)
|
|
for job_id, test in initial_jobs_to_tests.items():
|
|
if test.job_retries > 0:
|
|
self._add_job(new_job_id=job_id,
|
|
retry_max=test.job_retries)
|
|
else:
|
|
logging.debug("Test %s has no retries", test.name)
|
|
|
|
|
|
def _add_job(self, new_job_id, retry_max):
|
|
"""Add a newly-created job to the retry map.
|
|
|
|
@param new_job_id: The afe_job_id of a newly created job.
|
|
@param retry_max: The maximum of times that we could retry
|
|
the test if the job fails.
|
|
|
|
@raises ValueError if new_job_id is already in retry map.
|
|
|
|
"""
|
|
if new_job_id in self._retry_map:
|
|
raise ValueError('add_job called when job is already in retry map.')
|
|
|
|
self._retry_map[new_job_id] = {
|
|
'state': self.States.NOT_ATTEMPTED,
|
|
'retry_max': retry_max}
|
|
|
|
|
|
def _suite_max_reached(self):
|
|
"""Return whether maximum retry limit for a suite has been reached."""
|
|
return self._max_retries <= 0
|
|
|
|
|
|
def add_retry(self, old_job_id, new_job_id):
|
|
"""Record a retry.
|
|
|
|
Update retry map with the retry information.
|
|
|
|
@param old_job_id: The afe_job_id of the job that is retried.
|
|
@param new_job_id: The afe_job_id of the retry job.
|
|
|
|
@raises KeyError if old_job_id isn't in the retry map.
|
|
@raises ValueError if we have already retried or made an attempt
|
|
to retry the old job.
|
|
|
|
"""
|
|
old_record = self._retry_map[old_job_id]
|
|
if old_record['state'] != self.States.NOT_ATTEMPTED:
|
|
raise ValueError(
|
|
'We have already retried or attempted to retry job %d' %
|
|
old_job_id)
|
|
old_record['state'] = self.States.RETRIED
|
|
self._add_job(new_job_id=new_job_id,
|
|
retry_max=old_record['retry_max'] - 1)
|
|
self._max_retries -= 1
|
|
|
|
|
|
def set_attempted(self, job_id):
|
|
"""Set the state of the job to ATTEMPTED.
|
|
|
|
@param job_id: afe_job_id of a job.
|
|
|
|
@raises KeyError if job_id isn't in the retry map.
|
|
@raises ValueError if the current state is not NOT_ATTEMPTED.
|
|
|
|
"""
|
|
current_state = self._retry_map[job_id]['state']
|
|
if current_state != self.States.NOT_ATTEMPTED:
|
|
# We are supposed to retry or attempt to retry each job
|
|
# only once. Raise an error if this is not the case.
|
|
raise ValueError('Unexpected state transition: %s -> %s' %
|
|
(self.States.get_string(current_state),
|
|
self.States.get_string(self.States.ATTEMPTED)))
|
|
else:
|
|
self._retry_map[job_id]['state'] = self.States.ATTEMPTED
|
|
|
|
|
|
def has_following_retry(self, result):
|
|
"""Check whether there will be a following retry.
|
|
|
|
We have the following cases for a given job id (result.id),
|
|
- no retry map entry -> retry not required, no following retry
|
|
- has retry map entry:
|
|
- already retried -> has following retry
|
|
- has not retried
|
|
(this branch can be handled by checking should_retry(result))
|
|
- retry_max == 0 --> the last retry job, no more retry
|
|
- retry_max > 0
|
|
- attempted, but has failed in scheduling a
|
|
following retry due to rpc error --> no more retry
|
|
- has not attempped --> has following retry if test failed.
|
|
|
|
@param result: A result, encapsulating the status of the job.
|
|
|
|
@returns: True, if there will be a following retry.
|
|
False otherwise.
|
|
|
|
"""
|
|
return (result.test_executed
|
|
and result.id in self._retry_map
|
|
and (self._retry_map[result.id]['state'] == self.States.RETRIED
|
|
or self._should_retry(result)))
|
|
|
|
|
|
def _should_retry(self, result):
|
|
"""Check whether we should retry a job based on its result.
|
|
|
|
We will retry the job that corresponds to the result
|
|
when all of the following are true.
|
|
a) The test was actually executed, meaning that if
|
|
a job was aborted before it could ever reach the state
|
|
of 'Running', the job will not be retried.
|
|
b) The result is worse than |self._retry_level| which
|
|
defaults to 'WARN'.
|
|
c) The test requires retry, i.e. the job has an entry in the retry map.
|
|
d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
|
|
Note that if a test has JOB_RETRIES=5, and the second time
|
|
it was retried it hit an rpc error, we will give up on
|
|
all following retries.
|
|
e) The job has not reached its retry max, i.e. retry_max > 0
|
|
|
|
@param result: A result, encapsulating the status of the job.
|
|
|
|
@returns: True if we should retry the job.
|
|
|
|
"""
|
|
return (
|
|
result.test_executed
|
|
and result.id in self._retry_map
|
|
and not self._suite_max_reached()
|
|
and result.is_worse_than(
|
|
job_status.Status(self._retry_level, '', 'reason'))
|
|
and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
|
|
and self._retry_map[result.id]['retry_max'] > 0
|
|
)
|
|
|
|
def _should_retry_local_job(self, job_id):
|
|
"""Check whether we should retry a job based on information available
|
|
for a local job without a Result object.
|
|
|
|
We will retry the job that corresponds to the result
|
|
when all of the following are true.
|
|
a) The test requires retry, i.e. the job has an entry in the retry map.
|
|
b) We haven't made any retry attempt yet for this job, i.e.
|
|
state == NOT_ATTEMPTED
|
|
If the job is aborted, we will give up on all following retries,
|
|
regardless of max_retries.
|
|
c) The job has not reached its retry max, i.e. retry_max > 0
|
|
|
|
@param job_id: the id for the job, to look up relevant information.
|
|
|
|
@returns: True if we should retry the job.
|
|
|
|
"""
|
|
if self._suite_max_reached():
|
|
logging.debug('suite max_retries reached, not retrying.')
|
|
return False
|
|
if job_id not in self._retry_map:
|
|
logging.debug('job_id not in retry map, not retrying.')
|
|
return False
|
|
if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
|
|
logging.debug("job state was %s not 'Not Attempted', not retrying",
|
|
self._retry_map[job_id]['state'])
|
|
return False
|
|
if self._retry_map[job_id]['retry_max'] <= 0:
|
|
logging.debug('test-level retries exhausted, not retrying')
|
|
return False
|
|
return True
|
|
|
|
|
|
def job_present(self, job_id):
|
|
"""Check whether a job id present in the retry map.
|
|
|
|
@param job_id: afe_job_id of a job.
|
|
|
|
@returns: A True if the job is present, False if not.
|
|
"""
|
|
return bool(self._retry_map.get(job_id))
|
|
|
|
|
|
|
|
def get_retry_max(self, job_id):
|
|
"""Get the maximum times the job can still be retried.
|
|
|
|
@param job_id: afe_job_id of a job.
|
|
|
|
@returns: An int, representing the maximum times the job can still be
|
|
retried.
|
|
@raises KeyError if job_id isn't in the retry map.
|
|
|
|
"""
|
|
return self._retry_map[job_id]['retry_max']
|
|
|
|
|
|
class _SuiteChildJobCreator(object):
|
|
"""Create test jobs for a suite."""
|
|
|
|
def __init__(
|
|
self,
|
|
tag,
|
|
builds,
|
|
board,
|
|
afe=None,
|
|
max_runtime_mins=24*60,
|
|
timeout_mins=24*60,
|
|
suite_job_id=None,
|
|
ignore_deps=False,
|
|
extra_deps=(),
|
|
priority=priorities.Priority.DEFAULT,
|
|
offload_failures_only=False,
|
|
test_source_build=None,
|
|
job_keyvals=None,
|
|
):
|
|
"""
|
|
Constructor
|
|
|
|
@param tag: a string with which to tag jobs run in this suite.
|
|
@param builds: the builds on which we're running this suite.
|
|
@param board: the board on which we're running this suite.
|
|
@param afe: an instance of AFE as defined in server/frontend.py.
|
|
@param max_runtime_mins: Maximum suite runtime, in minutes.
|
|
@param timeout_mins: Maximum job lifetime, in minutes.
|
|
@param suite_job_id: Job id that will act as parent id to all sub jobs.
|
|
Default: None
|
|
@param ignore_deps: True if jobs should ignore the DEPENDENCIES
|
|
attribute and skip applying of dependency labels.
|
|
(Default:False)
|
|
@param extra_deps: A list of strings which are the extra DEPENDENCIES
|
|
to add to each test being scheduled.
|
|
@param priority: Integer priority level. Higher is more important.
|
|
@param offload_failures_only: Only enable gs_offloading for failed
|
|
jobs.
|
|
@param test_source_build: Build that contains the server-side test code.
|
|
@param job_keyvals: General job keyvals to be inserted into keyval file,
|
|
which will be used by tko/parse later.
|
|
"""
|
|
self._tag = tag
|
|
self._builds = builds
|
|
self._board = board
|
|
self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
|
|
delay_sec=10,
|
|
debug=False)
|
|
self._max_runtime_mins = max_runtime_mins
|
|
self._timeout_mins = timeout_mins
|
|
self._suite_job_id = suite_job_id
|
|
self._ignore_deps = ignore_deps
|
|
self._extra_deps = tuple(extra_deps)
|
|
self._priority = priority
|
|
self._offload_failures_only = offload_failures_only
|
|
self._test_source_build = test_source_build
|
|
self._job_keyvals = job_keyvals
|
|
|
|
|
|
@property
|
|
def cros_build(self):
|
|
"""Return the CrOS build or the first build in the builds dict."""
|
|
# TODO(ayatane): Note that the builds dict isn't ordered. I'm not
|
|
# sure what the implications of this are, but it's probably not a
|
|
# good thing.
|
|
return self._builds.get(provision.CROS_VERSION_PREFIX,
|
|
list(self._builds.values())[0])
|
|
|
|
|
|
def create_job(self, test, retry_for=None):
|
|
"""
|
|
Thin wrapper around frontend.AFE.create_job().
|
|
|
|
@param test: ControlData object for a test to run.
|
|
@param retry_for: If the to-be-created job is a retry for an
|
|
old job, the afe_job_id of the old job will
|
|
be passed in as |retry_for|, which will be
|
|
recorded in the new job's keyvals.
|
|
@returns: A frontend.Job object with an added test_name member.
|
|
test_name is used to preserve the higher level TEST_NAME
|
|
name of the job.
|
|
"""
|
|
# For a system running multiple suites which share tests, the priority
|
|
# overridden may lead to unexpected scheduling order that adds extra
|
|
# provision jobs.
|
|
test_priority = self._priority
|
|
if utils.is_moblab():
|
|
test_priority = max(self._priority, test.priority)
|
|
|
|
reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
|
|
else None)
|
|
|
|
test_obj = self._afe.create_job(
|
|
control_file=test.text,
|
|
name=tools.create_job_name(
|
|
self._test_source_build or self.cros_build,
|
|
self._tag,
|
|
test.name),
|
|
control_type=test.test_type.capitalize(),
|
|
meta_hosts=[self._board]*test.sync_count,
|
|
dependencies=self._create_job_deps(test),
|
|
keyvals=self._create_keyvals_for_test_job(test, retry_for),
|
|
max_runtime_mins=self._max_runtime_mins,
|
|
timeout_mins=self._timeout_mins,
|
|
parent_job_id=self._suite_job_id,
|
|
reboot_before=reboot_before,
|
|
run_reset=not test.fast,
|
|
priority=test_priority,
|
|
synch_count=test.sync_count,
|
|
require_ssp=test.require_ssp)
|
|
|
|
test_obj.test_name = test.name
|
|
return test_obj
|
|
|
|
|
|
def _create_job_deps(self, test):
|
|
"""Create job deps list for a test job.
|
|
|
|
@returns: A list of dependency strings.
|
|
"""
|
|
if self._ignore_deps:
|
|
job_deps = []
|
|
else:
|
|
job_deps = list(test.dependencies)
|
|
job_deps.extend(self._extra_deps)
|
|
return job_deps
|
|
|
|
|
|
def _create_keyvals_for_test_job(self, test, retry_for=None):
|
|
"""Create keyvals dict for creating a test job.
|
|
|
|
@param test: ControlData object for a test to run.
|
|
@param retry_for: If the to-be-created job is a retry for an
|
|
old job, the afe_job_id of the old job will
|
|
be passed in as |retry_for|, which will be
|
|
recorded in the new job's keyvals.
|
|
@returns: A keyvals dict for creating the test job.
|
|
"""
|
|
keyvals = {
|
|
constants.JOB_BUILD_KEY: self.cros_build,
|
|
constants.JOB_SUITE_KEY: self._tag,
|
|
constants.JOB_EXPERIMENTAL_KEY: test.experimental,
|
|
constants.JOB_BUILDS_KEY: self._builds
|
|
}
|
|
# test_source_build is saved to job_keyvals so scheduler can retrieve
|
|
# the build name from database when compiling autoserv commandline.
|
|
# This avoid a database change to add a new field in afe_jobs.
|
|
#
|
|
# Only add `test_source_build` to job keyvals if the build is different
|
|
# from the CrOS build or the job uses more than one build, e.g., both
|
|
# firmware and CrOS will be updated in the dut.
|
|
# This is for backwards compatibility, so the update Autotest code can
|
|
# compile an autoserv command line to run in a SSP container using
|
|
# previous builds.
|
|
if (self._test_source_build and
|
|
(self.cros_build != self._test_source_build or
|
|
len(self._builds) > 1)):
|
|
keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
|
|
self._test_source_build
|
|
for prefix, build in six.iteritems(self._builds):
|
|
if prefix == provision.FW_RW_VERSION_PREFIX:
|
|
keyvals[constants.FWRW_BUILD]= build
|
|
elif prefix == provision.FW_RO_VERSION_PREFIX:
|
|
keyvals[constants.FWRO_BUILD] = build
|
|
# Add suite job id to keyvals so tko parser can read it from keyval
|
|
# file.
|
|
if self._suite_job_id:
|
|
keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
|
|
# We drop the old job's id in the new job's keyval file so that
|
|
# later our tko parser can figure out the retry relationship and
|
|
# invalidate the results of the old job in tko database.
|
|
if retry_for:
|
|
keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
|
|
if self._offload_failures_only:
|
|
keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
|
|
if self._job_keyvals:
|
|
for key in constants.INHERITED_KEYVALS:
|
|
if key in self._job_keyvals:
|
|
keyvals[key] = self._job_keyvals[key]
|
|
return keyvals
|
|
|
|
|
|
class _ControlFileRetriever(object):
|
|
"""Retrieves control files.
|
|
|
|
This returns control data instances, unlike control file getters
|
|
which simply return the control file text contents.
|
|
"""
|
|
|
|
def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
|
|
test_args=None):
|
|
"""Initialize instance.
|
|
|
|
@param cf_getter: a control_file_getter.ControlFileGetter used to list
|
|
and fetch the content of control files
|
|
@param forgiving_parser: If False, will raise ControlVariableExceptions
|
|
if any are encountered when parsing control
|
|
files. Note that this can raise an exception
|
|
for syntax errors in unrelated files, because
|
|
we parse them before applying the predicate.
|
|
@param run_prod_code: If true, the retrieved tests will run the test
|
|
code that lives in prod aka the test code
|
|
currently on the lab servers by disabling
|
|
SSP for the discovered tests.
|
|
@param test_args: A dict of args to be seeded in test control file under
|
|
the name |args_dict|.
|
|
"""
|
|
self._cf_getter = cf_getter
|
|
self._forgiving_parser = forgiving_parser
|
|
self._run_prod_code = run_prod_code
|
|
self._test_args = test_args
|
|
|
|
|
|
def retrieve_for_test(self, test_name):
|
|
"""Retrieve a test's control data.
|
|
|
|
This ignores forgiving_parser because we cannot return a
|
|
forgiving value.
|
|
|
|
@param test_name: Name of test to retrieve.
|
|
|
|
@raises ControlVariableException: There is a syntax error in a
|
|
control file.
|
|
|
|
@returns a ControlData object
|
|
"""
|
|
return suite_common.retrieve_control_data_for_test(
|
|
self._cf_getter, test_name)
|
|
|
|
|
|
def retrieve_for_suite(self, suite_name=''):
|
|
"""Scan through all tests and find all tests.
|
|
|
|
@param suite_name: If specified, this method will attempt to restrain
|
|
the search space to just this suite's control files.
|
|
|
|
@raises ControlVariableException: If forgiving_parser is False and there
|
|
is a syntax error in a control file.
|
|
|
|
@returns a dictionary of ControlData objects that based on given
|
|
parameters.
|
|
"""
|
|
tests = suite_common.retrieve_for_suite(
|
|
self._cf_getter, suite_name, self._forgiving_parser,
|
|
self._test_args)
|
|
if self._run_prod_code:
|
|
for test in six.itervalues(tests):
|
|
test.require_ssp = False
|
|
|
|
return tests
|
|
|
|
|
|
def list_all_suites(build, devserver, cf_getter=None):
|
|
"""
|
|
Parses all ControlData objects with a SUITE tag and extracts all
|
|
defined suite names.
|
|
|
|
@param build: the build on which we're running this suite.
|
|
@param devserver: the devserver which contains the build.
|
|
@param cf_getter: control_file_getter.ControlFileGetter. Defaults to
|
|
using DevServerGetter.
|
|
|
|
@return list of suites
|
|
"""
|
|
if cf_getter is None:
|
|
cf_getter = _create_ds_getter(build, devserver)
|
|
|
|
suites = set()
|
|
predicate = lambda t: True
|
|
for test in find_and_parse_tests(cf_getter, predicate):
|
|
suites.update(test.suite_tag_parts)
|
|
return list(suites)
|
|
|
|
|
|
def test_file_similarity_predicate(test_file_pattern):
|
|
"""Returns predicate that gets the similarity based on a test's file
|
|
name pattern.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns a tuple of (file path, ratio), where ratio is the
|
|
similarity between the test file name and the given test_file_pattern.
|
|
|
|
@param test_file_pattern: regular expression (string) to match against
|
|
control file names.
|
|
@return a callable that takes a ControlData and and returns a tuple of
|
|
(file path, ratio), where ratio is the similarity between the
|
|
test file name and the given test_file_pattern.
|
|
"""
|
|
return lambda t: ((None, 0) if not hasattr(t, 'path') else
|
|
(t.path, difflib.SequenceMatcher(a=t.path,
|
|
b=test_file_pattern).ratio()))
|
|
|
|
|
|
def test_name_similarity_predicate(test_name):
|
|
"""Returns predicate that matched based on a test's name.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns a tuple of (test name, ratio), where ratio is the similarity
|
|
between the test name and the given test_name.
|
|
|
|
@param test_name: the test name to base the predicate on.
|
|
@return a callable that takes a ControlData and returns a tuple of
|
|
(test name, ratio), where ratio is the similarity between the
|
|
test name and the given test_name.
|
|
"""
|
|
return lambda t: ((None, 0) if not hasattr(t, 'name') else
|
|
(t.name,
|
|
difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
|
|
|
|
|
|
def matches_attribute_expression_predicate(test_attr_boolstr):
|
|
"""Returns predicate that matches based on boolean expression of
|
|
attributes.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
ans returns True if the test attributes satisfy the given attribute
|
|
boolean expression.
|
|
|
|
@param test_attr_boolstr: boolean expression of the attributes to be
|
|
test, like 'system:all and interval:daily'.
|
|
|
|
@return a callable that takes a ControlData and returns True if the test
|
|
attributes satisfy the given boolean expression.
|
|
"""
|
|
return lambda t: boolparse_lib.BoolstrResult(
|
|
test_attr_boolstr, t.attributes)
|
|
|
|
|
|
def test_file_matches_pattern_predicate(test_file_pattern):
|
|
"""Returns predicate that matches based on a test's file name pattern.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns True if the test's control file name matches the given
|
|
regular expression.
|
|
|
|
@param test_file_pattern: regular expression (string) to match against
|
|
control file names.
|
|
@return a callable that takes a ControlData and and returns
|
|
True if control file name matches the pattern.
|
|
"""
|
|
return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
|
|
t.path)
|
|
|
|
|
|
def test_name_matches_pattern_predicate(test_name_pattern):
|
|
"""Returns predicate that matches based on a test's name pattern.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns True if the test name matches the given regular expression.
|
|
|
|
@param test_name_pattern: regular expression (string) to match against
|
|
test names.
|
|
@return a callable that takes a ControlData and returns
|
|
True if the name fields matches the pattern.
|
|
"""
|
|
return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
|
|
t.name)
|
|
|
|
|
|
def test_name_equals_predicate(test_name):
|
|
"""Returns predicate that matched based on a test's name.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns True if the test name is equal to |test_name|.
|
|
|
|
@param test_name: the test name to base the predicate on.
|
|
@return a callable that takes a ControlData and looks for |test_name|
|
|
in that ControlData's name.
|
|
"""
|
|
return lambda t: hasattr(t, 'name') and test_name == t.name
|
|
|
|
|
|
def name_in_tag_similarity_predicate(name):
|
|
"""Returns predicate that takes a control file and gets the similarity
|
|
of the suites in the control file and the given name.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns a list of tuples of (suite name, ratio), where suite name
|
|
is each suite listed in the control file, and ratio is the similarity
|
|
between each suite and the given name.
|
|
|
|
@param name: the suite name to base the predicate on.
|
|
@return a callable that takes a ControlData and returns a list of tuples
|
|
of (suite name, ratio), where suite name is each suite listed in
|
|
the control file, and ratio is the similarity between each suite
|
|
and the given name.
|
|
"""
|
|
return lambda t: [(suite,
|
|
difflib.SequenceMatcher(a=suite, b=name).ratio())
|
|
for suite in t.suite_tag_parts] or [(None, 0)]
|
|
|
|
|
|
def name_in_tag_predicate(name):
|
|
"""Returns predicate that takes a control file and looks for |name|.
|
|
|
|
Builds a predicate that takes in a parsed control file (a ControlData)
|
|
and returns True if the SUITE tag is present and contains |name|.
|
|
|
|
@param name: the suite name to base the predicate on.
|
|
@return a callable that takes a ControlData and looks for |name| in that
|
|
ControlData object's suite member.
|
|
"""
|
|
return suite_common.name_in_tag_predicate(name)
|
|
|
|
|
|
def create_fs_getter(autotest_dir):
|
|
"""
|
|
@param autotest_dir: the place to find autotests.
|
|
@return a FileSystemGetter instance that looks under |autotest_dir|.
|
|
"""
|
|
# currently hard-coded places to look for tests.
|
|
subpaths = ['server/site_tests', 'client/site_tests',
|
|
'server/tests', 'client/tests']
|
|
directories = [os.path.join(autotest_dir, p) for p in subpaths]
|
|
return control_file_getter.FileSystemGetter(directories)
|
|
|
|
|
|
def _create_ds_getter(build, devserver):
|
|
"""
|
|
@param build: the build on which we're running this suite.
|
|
@param devserver: the devserver which contains the build.
|
|
@return a FileSystemGetter instance that looks under |autotest_dir|.
|
|
"""
|
|
return control_file_getter.DevServerGetter(build, devserver)
|
|
|
|
|
|
def _non_experimental_tests_predicate(test_data):
|
|
"""Test predicate for non-experimental tests."""
|
|
return not test_data.experimental
|
|
|
|
|
|
def find_and_parse_tests(cf_getter, predicate, suite_name='',
|
|
add_experimental=False, forgiving_parser=True,
|
|
run_prod_code=False, test_args=None):
|
|
"""
|
|
Function to scan through all tests and find eligible tests.
|
|
|
|
Search through all tests based on given cf_getter, suite_name,
|
|
add_experimental and forgiving_parser, return the tests that match
|
|
given predicate.
|
|
|
|
@param cf_getter: a control_file_getter.ControlFileGetter used to list
|
|
and fetch the content of control files
|
|
@param predicate: a function that should return True when run over a
|
|
ControlData representation of a control file that should be in
|
|
this Suite.
|
|
@param suite_name: If specified, this method will attempt to restrain
|
|
the search space to just this suite's control files.
|
|
@param add_experimental: add tests with experimental attribute set.
|
|
@param forgiving_parser: If False, will raise ControlVariableExceptions
|
|
if any are encountered when parsing control
|
|
files. Note that this can raise an exception
|
|
for syntax errors in unrelated files, because
|
|
we parse them before applying the predicate.
|
|
@param run_prod_code: If true, the suite will run the test code that
|
|
lives in prod aka the test code currently on the
|
|
lab servers by disabling SSP for the discovered
|
|
tests.
|
|
@param test_args: A dict of args to be seeded in test control file.
|
|
|
|
@raises ControlVariableException: If forgiving_parser is False and there
|
|
is a syntax error in a control file.
|
|
|
|
@return list of ControlData objects that should be run, with control
|
|
file text added in |text| attribute. Results are sorted based
|
|
on the TIME setting in control file, slowest test comes first.
|
|
"""
|
|
logging.debug('Getting control file list for suite: %s', suite_name)
|
|
retriever = _ControlFileRetriever(cf_getter,
|
|
forgiving_parser=forgiving_parser,
|
|
run_prod_code=run_prod_code,
|
|
test_args=test_args)
|
|
tests = retriever.retrieve_for_suite(suite_name)
|
|
if not add_experimental:
|
|
predicate = _ComposedPredicate([predicate,
|
|
_non_experimental_tests_predicate])
|
|
return suite_common.filter_tests(tests, predicate)
|
|
|
|
|
|
def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
|
|
"""
|
|
Function to scan through all tests and find possible tests.
|
|
|
|
Search through all tests based on given cf_getter, suite_name,
|
|
add_experimental and forgiving_parser. Use the given predicate to
|
|
calculate the similarity and return the top 10 matches.
|
|
|
|
@param cf_getter: a control_file_getter.ControlFileGetter used to list
|
|
and fetch the content of control files
|
|
@param predicate: a function that should return a tuple of (name, ratio)
|
|
when run over a ControlData representation of a control file that
|
|
should be in this Suite. `name` is the key to be compared, e.g.,
|
|
a suite name or test name. `ratio` is a value between [0,1]
|
|
indicating the similarity of `name` and the value to be compared.
|
|
@param suite_name: If specified, this method will attempt to restrain
|
|
the search space to just this suite's control files.
|
|
@param count: Number of suggestions to return, default to 10.
|
|
|
|
@return list of top names that similar to the given test, sorted by
|
|
match ratio.
|
|
"""
|
|
logging.debug('Getting control file list for suite: %s', suite_name)
|
|
tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
|
|
logging.debug('Parsed %s control files.', len(tests))
|
|
similarities = {}
|
|
for test in six.itervalues(tests):
|
|
ratios = predicate(test)
|
|
# Some predicates may return a list of tuples, e.g.,
|
|
# name_in_tag_similarity_predicate. Convert all returns to a list.
|
|
if not isinstance(ratios, list):
|
|
ratios = [ratios]
|
|
for name, ratio in ratios:
|
|
similarities[name] = ratio
|
|
return [s[0] for s in
|
|
sorted(list(similarities.items()), key=operator.itemgetter(1),
|
|
reverse=True)][:count]
|
|
|
|
|
|
def _deprecated_suite_method(func):
|
|
"""Decorator for deprecated Suite static methods.
|
|
|
|
TODO(ayatane): This is used to decorate functions that are called as
|
|
static methods on Suite.
|
|
"""
|
|
@functools.wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
"""Wraps |func| for warning."""
|
|
warnings.warn('Calling method "%s" from Suite is deprecated' %
|
|
func.__name__)
|
|
return func(*args, **kwargs)
|
|
return staticmethod(wrapper)
|
|
|
|
|
|
class _BaseSuite(object):
|
|
"""
|
|
A suite of tests, defined by some predicate over control file variables.
|
|
|
|
Given a place to search for control files a predicate to match the desired
|
|
tests, can gather tests and fire off jobs to run them, and then wait for
|
|
results.
|
|
|
|
@var _predicate: a function that should return True when run over a
|
|
ControlData representation of a control file that should be in
|
|
this Suite.
|
|
@var _tag: a string with which to tag jobs run in this suite.
|
|
@var _builds: the builds on which we're running this suite.
|
|
@var _afe: an instance of AFE as defined in server/frontend.py.
|
|
@var _tko: an instance of TKO as defined in server/frontend.py.
|
|
@var _jobs: currently scheduled jobs, if any.
|
|
@var _jobs_to_tests: a dictionary that maps job ids to tests represented
|
|
ControlData objects.
|
|
@var _retry: a bool value indicating whether jobs should be retried on
|
|
failure.
|
|
@var _retry_handler: a RetryHandler object.
|
|
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
self,
|
|
tests,
|
|
tag,
|
|
builds,
|
|
board,
|
|
afe=None,
|
|
tko=None,
|
|
pool=None,
|
|
results_dir=None,
|
|
max_runtime_mins=24*60,
|
|
timeout_mins=24*60,
|
|
file_bugs=False,
|
|
suite_job_id=None,
|
|
ignore_deps=False,
|
|
extra_deps=None,
|
|
priority=priorities.Priority.DEFAULT,
|
|
wait_for_results=True,
|
|
job_retry=False,
|
|
max_retries=sys.maxsize,
|
|
offload_failures_only=False,
|
|
test_source_build=None,
|
|
job_keyvals=None,
|
|
child_dependencies=(),
|
|
result_reporter=None,
|
|
):
|
|
"""Initialize instance.
|
|
|
|
@param tests: Iterable of tests to run.
|
|
@param tag: a string with which to tag jobs run in this suite.
|
|
@param builds: the builds on which we're running this suite.
|
|
@param board: the board on which we're running this suite.
|
|
@param afe: an instance of AFE as defined in server/frontend.py.
|
|
@param tko: an instance of TKO as defined in server/frontend.py.
|
|
@param pool: Specify the pool of machines to use for scheduling
|
|
purposes.
|
|
@param results_dir: The directory where the job can write results to.
|
|
This must be set if you want job_id of sub-jobs
|
|
list in the job keyvals.
|
|
@param max_runtime_mins: Maximum suite runtime, in minutes.
|
|
@param timeout: Maximum job lifetime, in hours.
|
|
@param suite_job_id: Job id that will act as parent id to all sub jobs.
|
|
Default: None
|
|
@param ignore_deps: True if jobs should ignore the DEPENDENCIES
|
|
attribute and skip applying of dependency labels.
|
|
(Default:False)
|
|
@param extra_deps: A list of strings which are the extra DEPENDENCIES
|
|
to add to each test being scheduled.
|
|
@param priority: Integer priority level. Higher is more important.
|
|
@param wait_for_results: Set to False to run the suite job without
|
|
waiting for test jobs to finish. Default is
|
|
True.
|
|
@param job_retry: A bool value indicating whether jobs should be retried
|
|
on failure. If True, the field 'JOB_RETRIES' in
|
|
control files will be respected. If False, do not
|
|
retry.
|
|
@param max_retries: Maximum retry limit at suite level.
|
|
Regardless how many times each individual test
|
|
has been retried, the total number of retries
|
|
happening in the suite can't exceed _max_retries.
|
|
Default to sys.maxint.
|
|
@param offload_failures_only: Only enable gs_offloading for failed
|
|
jobs.
|
|
@param test_source_build: Build that contains the server-side test code.
|
|
@param job_keyvals: General job keyvals to be inserted into keyval file,
|
|
which will be used by tko/parse later.
|
|
@param child_dependencies: (optional) list of dependency strings
|
|
to be added as dependencies to child jobs.
|
|
@param result_reporter: A _ResultReporter instance to report results. If
|
|
None, an _EmailReporter will be created.
|
|
"""
|
|
|
|
self.tests = list(tests)
|
|
self._tag = tag
|
|
self._builds = builds
|
|
self._results_dir = results_dir
|
|
self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
|
|
delay_sec=10,
|
|
debug=False)
|
|
self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
|
|
delay_sec=10,
|
|
debug=False)
|
|
self._jobs = []
|
|
self._jobs_to_tests = {}
|
|
|
|
self._file_bugs = file_bugs
|
|
self._suite_job_id = suite_job_id
|
|
self._job_retry=job_retry
|
|
self._max_retries = max_retries
|
|
# RetryHandler to be initialized in schedule()
|
|
self._retry_handler = None
|
|
self.wait_for_results = wait_for_results
|
|
self._job_keyvals = job_keyvals
|
|
if result_reporter is None:
|
|
self._result_reporter = _EmailReporter(self)
|
|
else:
|
|
self._result_reporter = result_reporter
|
|
|
|
if extra_deps is None:
|
|
extra_deps = []
|
|
extra_deps.append(board)
|
|
if pool:
|
|
extra_deps.append(pool)
|
|
extra_deps.extend(child_dependencies)
|
|
self._dependencies = tuple(extra_deps)
|
|
|
|
self._job_creator = _SuiteChildJobCreator(
|
|
tag=tag,
|
|
builds=builds,
|
|
board=board,
|
|
afe=afe,
|
|
max_runtime_mins=max_runtime_mins,
|
|
timeout_mins=timeout_mins,
|
|
suite_job_id=suite_job_id,
|
|
ignore_deps=ignore_deps,
|
|
extra_deps=extra_deps,
|
|
priority=priority,
|
|
offload_failures_only=offload_failures_only,
|
|
test_source_build=test_source_build,
|
|
job_keyvals=job_keyvals,
|
|
)
|
|
|
|
|
|
def _schedule_test(self, record, test, retry_for=None):
|
|
"""Schedule a single test and return the job.
|
|
|
|
Schedule a single test by creating a job, and then update relevant
|
|
data structures that are used to keep track of all running jobs.
|
|
|
|
Emits a TEST_NA status log entry if it failed to schedule the test due
|
|
to NoEligibleHostException or a non-existent board label.
|
|
|
|
Returns a frontend.Job object if the test is successfully scheduled.
|
|
If scheduling failed due to NoEligibleHostException or a non-existent
|
|
board label, returns None.
|
|
|
|
@param record: A callable to use for logging.
|
|
prototype: record(base_job.status_log_entry)
|
|
@param test: ControlData for a test to run.
|
|
@param retry_for: If we are scheduling a test to retry an
|
|
old job, the afe_job_id of the old job
|
|
will be passed in as |retry_for|.
|
|
|
|
@returns: A frontend.Job object or None
|
|
"""
|
|
msg = 'Scheduling %s' % test.name
|
|
if retry_for:
|
|
msg = msg + ', to retry afe job %d' % retry_for
|
|
logging.debug(msg)
|
|
begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
|
|
try:
|
|
job = self._job_creator.create_job(test, retry_for=retry_for)
|
|
except (error.NoEligibleHostException, proxy.ValidationError) as e:
|
|
if (isinstance(e, error.NoEligibleHostException)
|
|
or (isinstance(e, proxy.ValidationError)
|
|
and _is_nonexistent_board_error(e))):
|
|
# Treat a dependency on a non-existent board label the same as
|
|
# a dependency on a board that exists, but for which there's no
|
|
# hardware.
|
|
logging.debug('%s not applicable for this board/pool. '
|
|
'Emitting TEST_NA.', test.name)
|
|
Status('TEST_NA', test.name,
|
|
'Skipping: test not supported on this board/pool.',
|
|
begin_time_str=begin_time_str).record_all(record)
|
|
return None
|
|
else:
|
|
raise e
|
|
except (error.RPCException, proxy.JSONRPCException):
|
|
if retry_for:
|
|
# Mark that we've attempted to retry the old job.
|
|
logging.debug("RPC exception occurred")
|
|
self._retry_handler.set_attempted(job_id=retry_for)
|
|
raise
|
|
else:
|
|
self._jobs.append(job)
|
|
self._jobs_to_tests[job.id] = test
|
|
if retry_for:
|
|
# A retry job was just created, record it.
|
|
self._retry_handler.add_retry(
|
|
old_job_id=retry_for, new_job_id=job.id)
|
|
retry_count = (test.job_retries -
|
|
self._retry_handler.get_retry_max(job.id))
|
|
logging.debug('Job %d created to retry job %d. '
|
|
'Have retried for %d time(s)',
|
|
job.id, retry_for, retry_count)
|
|
self._remember_job_keyval(job)
|
|
return job
|
|
|
|
def schedule(self, record):
|
|
"""
|
|
Schedule jobs using |self._afe|.
|
|
|
|
frontend.Job objects representing each scheduled job will be put in
|
|
|self._jobs|.
|
|
|
|
@param record: A callable to use for logging.
|
|
prototype: record(base_job.status_log_entry)
|
|
@returns: The number of tests that were scheduled.
|
|
"""
|
|
scheduled_test_names = []
|
|
logging.debug('Discovered %d tests.', len(self.tests))
|
|
|
|
Status('INFO', 'Start %s' % self._tag).record_result(record)
|
|
try:
|
|
# Write job_keyvals into keyval file.
|
|
if self._job_keyvals:
|
|
utils.write_keyval(self._results_dir, self._job_keyvals)
|
|
|
|
# TODO(crbug.com/730885): This is a hack to protect tests that are
|
|
# not usually retried from getting hit by a provision error when run
|
|
# as part of a suite. Remove this hack once provision is separated
|
|
# out in its own suite.
|
|
self._bump_up_test_retries(self.tests)
|
|
for test in self.tests:
|
|
scheduled_job = self._schedule_test(record, test)
|
|
if scheduled_job is not None:
|
|
scheduled_test_names.append(test.name)
|
|
|
|
# Write the num of scheduled tests and name of them to keyval file.
|
|
logging.debug('Scheduled %d tests, writing the total to keyval.',
|
|
len(scheduled_test_names))
|
|
utils.write_keyval(
|
|
self._results_dir,
|
|
self._make_scheduled_tests_keyvals(scheduled_test_names))
|
|
except Exception:
|
|
logging.exception('Exception while scheduling suite')
|
|
Status('FAIL', self._tag,
|
|
'Exception while scheduling suite').record_result(record)
|
|
|
|
if self._job_retry:
|
|
logging.debug("Initializing RetryHandler for suite %s.", self._tag)
|
|
self._retry_handler = RetryHandler(
|
|
initial_jobs_to_tests=self._jobs_to_tests,
|
|
max_retries=self._max_retries)
|
|
logging.debug("retry map created: %s ",
|
|
self._retry_handler._retry_map)
|
|
else:
|
|
logging.info("Will not retry jobs from suite %s.", self._tag)
|
|
return len(scheduled_test_names)
|
|
|
|
|
|
def _bump_up_test_retries(self, tests):
|
|
"""Bump up individual test retries to match suite retry options."""
|
|
if not self._job_retry:
|
|
return
|
|
|
|
for test in tests:
|
|
# We do honor if a test insists on JOB_RETRIES = 0.
|
|
if test.job_retries is None:
|
|
logging.debug(
|
|
'Test %s did not request retries, but suite requires '
|
|
'retries. Bumping retries up to 1. '
|
|
'(See crbug.com/730885)',
|
|
test.name)
|
|
test.job_retries = 1
|
|
|
|
|
|
def _make_scheduled_tests_keyvals(self, scheduled_test_names):
|
|
"""Make a keyvals dict to write for scheduled test names.
|
|
|
|
@param scheduled_test_names: A list of scheduled test name strings.
|
|
|
|
@returns: A keyvals dict.
|
|
"""
|
|
return {
|
|
constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
|
|
constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
|
|
}
|
|
|
|
|
|
def _should_report(self, result):
|
|
"""
|
|
Returns True if this failure requires to be reported.
|
|
|
|
@param result: A result, encapsulating the status of the failed job.
|
|
@return: True if we should report this failure.
|
|
"""
|
|
return (self._file_bugs and result.test_executed and
|
|
not result.is_testna() and
|
|
result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
|
|
|
|
|
|
def _has_retry(self, result):
|
|
"""
|
|
Return True if this result gets to retry.
|
|
|
|
@param result: A result, encapsulating the status of the failed job.
|
|
@return: bool
|
|
"""
|
|
return (self._job_retry
|
|
and self._retry_handler.has_following_retry(result))
|
|
|
|
|
|
def wait(self, record):
|
|
"""
|
|
Polls for the job statuses, using |record| to print status when each
|
|
completes.
|
|
|
|
@param record: callable that records job status.
|
|
prototype:
|
|
record(base_job.status_log_entry)
|
|
"""
|
|
waiter = job_status.JobResultWaiter(self._afe, self._tko)
|
|
try:
|
|
if self._suite_job_id:
|
|
jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
|
|
else:
|
|
logging.warning('Unknown suite_job_id, falling back to less '
|
|
'efficient results_generator.')
|
|
jobs = self._jobs
|
|
waiter.add_jobs(jobs)
|
|
for result in waiter.wait_for_results():
|
|
self._handle_result(result=result, record=record, waiter=waiter)
|
|
if self._finished_waiting():
|
|
break
|
|
except Exception: # pylint: disable=W0703
|
|
logging.exception('Exception waiting for results')
|
|
Status('FAIL', self._tag,
|
|
'Exception waiting for results').record_result(record)
|
|
|
|
|
|
def _finished_waiting(self):
|
|
"""Return whether the suite is finished waiting for child jobs."""
|
|
return False
|
|
|
|
|
|
def _handle_result(self, result, record, waiter):
|
|
"""
|
|
Handle a test job result.
|
|
|
|
@param result: Status instance for job.
|
|
@param record: callable that records job status.
|
|
prototype:
|
|
record(base_job.status_log_entry)
|
|
@param waiter: JobResultsWaiter instance.
|
|
|
|
@instance_param _result_reporter: _ResultReporter instance.
|
|
"""
|
|
self._record_result(result, record)
|
|
rescheduled = False
|
|
if self._job_retry and self._retry_handler._should_retry(result):
|
|
rescheduled = self._retry_result(result, record, waiter)
|
|
# TODO (crbug.com/751428): If the suite times out before a retry could
|
|
# finish, we would lose the chance to report errors from the original
|
|
# job.
|
|
if self._has_retry(result) and rescheduled:
|
|
return
|
|
|
|
if self._should_report(result):
|
|
self._result_reporter.report(result)
|
|
|
|
def _record_result(self, result, record):
|
|
"""
|
|
Record a test job result.
|
|
|
|
@param result: Status instance for job.
|
|
@param record: callable that records job status.
|
|
prototype:
|
|
record(base_job.status_log_entry)
|
|
"""
|
|
result.record_all(record)
|
|
self._remember_job_keyval(result)
|
|
|
|
|
|
def _retry_result(self, result, record, waiter):
|
|
"""
|
|
Retry a test job result.
|
|
|
|
@param result: Status instance for job.
|
|
@param record: callable that records job status.
|
|
prototype:
|
|
record(base_job.status_log_entry)
|
|
@param waiter: JobResultsWaiter instance.
|
|
@returns: True if a job was scheduled for retry, False otherwise.
|
|
"""
|
|
test = self._jobs_to_tests[result.id]
|
|
try:
|
|
# It only takes effect for CQ retriable job:
|
|
# 1) in first try, test.fast=True.
|
|
# 2) in second try, test will be run in normal mode, so reset
|
|
# test.fast=False.
|
|
test.fast = False
|
|
new_job = self._schedule_test(
|
|
record=record, test=test, retry_for=result.id)
|
|
except (error.RPCException, proxy.JSONRPCException) as e:
|
|
logging.error('Failed to schedule test: %s, Reason: %s',
|
|
test.name, e)
|
|
return False
|
|
else:
|
|
waiter.add_job(new_job)
|
|
return bool(new_job)
|
|
|
|
@property
|
|
def jobs(self):
|
|
"""Give a copy of the associated jobs
|
|
|
|
@returns: array of jobs"""
|
|
return [job for job in self._jobs]
|
|
|
|
|
|
@property
|
|
def _should_file_bugs(self):
|
|
"""Return whether bugs should be filed.
|
|
|
|
@returns: bool
|
|
"""
|
|
# File bug when failure is one of the _FILE_BUG_SUITES,
|
|
# otherwise send an email to the owner anc cc.
|
|
return self._tag in _FILE_BUG_SUITES
|
|
|
|
|
|
def abort(self):
|
|
"""
|
|
Abort all scheduled test jobs.
|
|
"""
|
|
if self._jobs:
|
|
job_ids = [job.id for job in self._jobs]
|
|
self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
|
|
|
|
|
|
def _remember_job_keyval(self, job):
|
|
"""
|
|
Record provided job as a suite job keyval, for later referencing.
|
|
|
|
@param job: some representation of a job that has the attributes:
|
|
id, test_name, and owner
|
|
"""
|
|
if self._results_dir and job.id and job.owner and job.test_name:
|
|
job_id_owner = '%s-%s' % (job.id, job.owner)
|
|
logging.debug('Adding job keyval for %s=%s',
|
|
job.test_name, job_id_owner)
|
|
utils.write_keyval(
|
|
self._results_dir,
|
|
{hashlib.md5(job.test_name).hexdigest(): job_id_owner})
|
|
|
|
|
|
class Suite(_BaseSuite):
|
|
"""
|
|
A suite of tests, defined by some predicate over control file variables.
|
|
|
|
Given a place to search for control files a predicate to match the desired
|
|
tests, can gather tests and fire off jobs to run them, and then wait for
|
|
results.
|
|
|
|
@var _predicate: a function that should return True when run over a
|
|
ControlData representation of a control file that should be in
|
|
this Suite.
|
|
@var _tag: a string with which to tag jobs run in this suite.
|
|
@var _builds: the builds on which we're running this suite.
|
|
@var _afe: an instance of AFE as defined in server/frontend.py.
|
|
@var _tko: an instance of TKO as defined in server/frontend.py.
|
|
@var _jobs: currently scheduled jobs, if any.
|
|
@var _jobs_to_tests: a dictionary that maps job ids to tests represented
|
|
ControlData objects.
|
|
@var _cf_getter: a control_file_getter.ControlFileGetter
|
|
@var _retry: a bool value indicating whether jobs should be retried on
|
|
failure.
|
|
@var _retry_handler: a RetryHandler object.
|
|
|
|
"""
|
|
|
|
# TODO(ayatane): These methods are kept on the Suite class for
|
|
# backward compatibility.
|
|
find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
|
|
find_possible_tests = _deprecated_suite_method(find_possible_tests)
|
|
create_fs_getter = _deprecated_suite_method(create_fs_getter)
|
|
name_in_tag_predicate = _deprecated_suite_method(
|
|
suite_common.name_in_tag_predicate)
|
|
name_in_tag_similarity_predicate = _deprecated_suite_method(
|
|
name_in_tag_similarity_predicate)
|
|
test_name_equals_predicate = _deprecated_suite_method(
|
|
test_name_equals_predicate)
|
|
test_name_in_list_predicate = _deprecated_suite_method(
|
|
suite_common.test_name_in_list_predicate)
|
|
test_name_matches_pattern_predicate = _deprecated_suite_method(
|
|
test_name_matches_pattern_predicate)
|
|
test_file_matches_pattern_predicate = _deprecated_suite_method(
|
|
test_file_matches_pattern_predicate)
|
|
matches_attribute_expression_predicate = _deprecated_suite_method(
|
|
matches_attribute_expression_predicate)
|
|
test_name_similarity_predicate = _deprecated_suite_method(
|
|
test_name_similarity_predicate)
|
|
test_file_similarity_predicate = _deprecated_suite_method(
|
|
test_file_similarity_predicate)
|
|
list_all_suites = _deprecated_suite_method(list_all_suites)
|
|
get_test_source_build = _deprecated_suite_method(
|
|
suite_common.get_test_source_build)
|
|
|
|
|
|
@classmethod
|
|
def create_from_predicates(cls, predicates, builds, board, devserver,
|
|
cf_getter=None, name='ad_hoc_suite',
|
|
run_prod_code=False, **dargs):
|
|
"""
|
|
Create a Suite using a given predicate test filters.
|
|
|
|
Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
|
|
|autotest_dir| and will schedule them using |afe|. Pulls control files
|
|
from the default dev server. Results will be pulled from |tko| upon
|
|
completion.
|
|
|
|
@param predicates: A list of callables that accept ControlData
|
|
representations of control files. A test will be
|
|
included in suite if all callables in this list
|
|
return True on the given control file.
|
|
@param builds: the builds on which we're running this suite. It's a
|
|
dictionary of version_prefix:build.
|
|
@param board: the board on which we're running this suite.
|
|
@param devserver: the devserver which contains the build.
|
|
@param cf_getter: control_file_getter.ControlFileGetter. Defaults to
|
|
using DevServerGetter.
|
|
@param name: name of suite. Defaults to 'ad_hoc_suite'
|
|
@param run_prod_code: If true, the suite will run the tests that
|
|
lives in prod aka the test code currently on the
|
|
lab servers.
|
|
@param **dargs: Any other Suite constructor parameters, as described
|
|
in Suite.__init__ docstring.
|
|
@return a Suite instance.
|
|
"""
|
|
if cf_getter is None:
|
|
if run_prod_code:
|
|
cf_getter = create_fs_getter(_AUTOTEST_DIR)
|
|
else:
|
|
build = suite_common.get_test_source_build(builds, **dargs)
|
|
cf_getter = _create_ds_getter(build, devserver)
|
|
|
|
return cls(predicates,
|
|
name, builds, board, cf_getter, run_prod_code, **dargs)
|
|
|
|
|
|
@classmethod
|
|
def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
|
|
**dargs):
|
|
"""
|
|
Create a Suite using a predicate based on the SUITE control file var.
|
|
|
|
Makes a predicate based on |name| and uses it to instantiate a Suite
|
|
that looks for tests in |autotest_dir| and will schedule them using
|
|
|afe|. Pulls control files from the default dev server.
|
|
Results will be pulled from |tko| upon completion.
|
|
|
|
@param name: a value of the SUITE control file variable to search for.
|
|
@param builds: the builds on which we're running this suite. It's a
|
|
dictionary of version_prefix:build.
|
|
@param board: the board on which we're running this suite.
|
|
@param devserver: the devserver which contains the build.
|
|
@param cf_getter: control_file_getter.ControlFileGetter. Defaults to
|
|
using DevServerGetter.
|
|
@param **dargs: Any other Suite constructor parameters, as described
|
|
in Suite.__init__ docstring.
|
|
@return a Suite instance.
|
|
"""
|
|
if cf_getter is None:
|
|
build = suite_common.get_test_source_build(builds, **dargs)
|
|
cf_getter = _create_ds_getter(build, devserver)
|
|
|
|
return cls([suite_common.name_in_tag_predicate(name)],
|
|
name, builds, board, cf_getter, **dargs)
|
|
|
|
|
|
def __init__(
|
|
self,
|
|
predicates,
|
|
tag,
|
|
builds,
|
|
board,
|
|
cf_getter,
|
|
run_prod_code=False,
|
|
afe=None,
|
|
tko=None,
|
|
pool=None,
|
|
results_dir=None,
|
|
max_runtime_mins=24*60,
|
|
timeout_mins=24*60,
|
|
file_bugs=False,
|
|
suite_job_id=None,
|
|
ignore_deps=False,
|
|
extra_deps=None,
|
|
priority=priorities.Priority.DEFAULT,
|
|
forgiving_parser=True,
|
|
wait_for_results=True,
|
|
job_retry=False,
|
|
max_retries=sys.maxsize,
|
|
offload_failures_only=False,
|
|
test_source_build=None,
|
|
job_keyvals=None,
|
|
test_args=None,
|
|
child_dependencies=(),
|
|
result_reporter=None,
|
|
):
|
|
"""
|
|
Constructor
|
|
|
|
@param predicates: A list of callables that accept ControlData
|
|
representations of control files. A test will be
|
|
included in suite if all callables in this list
|
|
return True on the given control file.
|
|
@param tag: a string with which to tag jobs run in this suite.
|
|
@param builds: the builds on which we're running this suite.
|
|
@param board: the board on which we're running this suite.
|
|
@param cf_getter: a control_file_getter.ControlFileGetter
|
|
@param afe: an instance of AFE as defined in server/frontend.py.
|
|
@param tko: an instance of TKO as defined in server/frontend.py.
|
|
@param pool: Specify the pool of machines to use for scheduling
|
|
purposes.
|
|
@param run_prod_code: If true, the suite will run the test code that
|
|
lives in prod aka the test code currently on the
|
|
lab servers.
|
|
@param results_dir: The directory where the job can write results to.
|
|
This must be set if you want job_id of sub-jobs
|
|
list in the job keyvals.
|
|
@param max_runtime_mins: Maximum suite runtime, in minutes.
|
|
@param timeout: Maximum job lifetime, in hours.
|
|
@param suite_job_id: Job id that will act as parent id to all sub jobs.
|
|
Default: None
|
|
@param ignore_deps: True if jobs should ignore the DEPENDENCIES
|
|
attribute and skip applying of dependency labels.
|
|
(Default:False)
|
|
@param extra_deps: A list of strings which are the extra DEPENDENCIES
|
|
to add to each test being scheduled.
|
|
@param priority: Integer priority level. Higher is more important.
|
|
@param wait_for_results: Set to False to run the suite job without
|
|
waiting for test jobs to finish. Default is
|
|
True.
|
|
@param job_retry: A bool value indicating whether jobs should be retried
|
|
on failure. If True, the field 'JOB_RETRIES' in
|
|
control files will be respected. If False, do not
|
|
retry.
|
|
@param max_retries: Maximum retry limit at suite level.
|
|
Regardless how many times each individual test
|
|
has been retried, the total number of retries
|
|
happening in the suite can't exceed _max_retries.
|
|
Default to sys.maxint.
|
|
@param offload_failures_only: Only enable gs_offloading for failed
|
|
jobs.
|
|
@param test_source_build: Build that contains the server-side test code.
|
|
@param job_keyvals: General job keyvals to be inserted into keyval file,
|
|
which will be used by tko/parse later.
|
|
@param test_args: A dict of args passed all the way to each individual
|
|
test that will be actually ran.
|
|
@param child_dependencies: (optional) list of dependency strings
|
|
to be added as dependencies to child jobs.
|
|
@param result_reporter: A _ResultReporter instance to report results. If
|
|
None, an _EmailReporter will be created.
|
|
"""
|
|
tests = find_and_parse_tests(
|
|
cf_getter,
|
|
_ComposedPredicate(predicates),
|
|
tag,
|
|
forgiving_parser=forgiving_parser,
|
|
run_prod_code=run_prod_code,
|
|
test_args=test_args,
|
|
)
|
|
super(Suite, self).__init__(
|
|
tests=tests,
|
|
tag=tag,
|
|
builds=builds,
|
|
board=board,
|
|
afe=afe,
|
|
tko=tko,
|
|
pool=pool,
|
|
results_dir=results_dir,
|
|
max_runtime_mins=max_runtime_mins,
|
|
timeout_mins=timeout_mins,
|
|
file_bugs=file_bugs,
|
|
suite_job_id=suite_job_id,
|
|
ignore_deps=ignore_deps,
|
|
extra_deps=extra_deps,
|
|
priority=priority,
|
|
wait_for_results=wait_for_results,
|
|
job_retry=job_retry,
|
|
max_retries=max_retries,
|
|
offload_failures_only=offload_failures_only,
|
|
test_source_build=test_source_build,
|
|
job_keyvals=job_keyvals,
|
|
child_dependencies=child_dependencies,
|
|
result_reporter=result_reporter,
|
|
)
|
|
|
|
|
|
class ProvisionSuite(_BaseSuite):
|
|
"""
|
|
A suite for provisioning DUTs.
|
|
|
|
This is done by creating dummy_Pass tests.
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
self,
|
|
tag,
|
|
builds,
|
|
board,
|
|
devserver,
|
|
num_required,
|
|
num_max=float('inf'),
|
|
cf_getter=None,
|
|
run_prod_code=False,
|
|
test_args=None,
|
|
test_source_build=None,
|
|
**kwargs):
|
|
"""
|
|
Constructor
|
|
|
|
@param tag: a string with which to tag jobs run in this suite.
|
|
@param builds: the builds on which we're running this suite.
|
|
@param board: the board on which we're running this suite.
|
|
@param devserver: the devserver which contains the build.
|
|
@param num_required: number of tests that must pass. This is
|
|
capped by the number of tests that are run.
|
|
@param num_max: max number of tests to make. By default there
|
|
is no cap, a test is created for each eligible host.
|
|
@param cf_getter: a control_file_getter.ControlFileGetter.
|
|
@param test_args: A dict of args passed all the way to each individual
|
|
test that will be actually ran.
|
|
@param test_source_build: Build that contains the server-side test code.
|
|
@param kwargs: Various keyword arguments passed to
|
|
_BaseSuite constructor.
|
|
"""
|
|
super(ProvisionSuite, self).__init__(
|
|
tests=[],
|
|
tag=tag,
|
|
builds=builds,
|
|
board=board,
|
|
**kwargs)
|
|
self._num_successful = 0
|
|
self._num_required = 0
|
|
self.tests = []
|
|
|
|
static_deps = [dep for dep in self._dependencies
|
|
if not provision.Provision.acts_on(dep)]
|
|
if 'pool:suites' in static_deps:
|
|
logging.info('Provision suite is disabled on suites pool')
|
|
return
|
|
logging.debug('Looking for hosts matching %r', static_deps)
|
|
hosts = self._afe.get_hosts(
|
|
invalid=False, multiple_labels=static_deps)
|
|
logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
|
|
available_hosts = [h for h in hosts if h.is_available()]
|
|
logging.debug('Found %d available hosts for ProvisionSuite',
|
|
len(available_hosts))
|
|
dummy_test = _load_dummy_test(
|
|
builds, devserver, cf_getter,
|
|
run_prod_code, test_args, test_source_build)
|
|
self.tests = [dummy_test] * min(len(available_hosts), num_max)
|
|
logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
|
|
self._num_required = min(num_required, len(self.tests))
|
|
logging.debug('Expecting %d tests to pass for ProvisionSuite',
|
|
self._num_required)
|
|
|
|
def _handle_result(self, result, record, waiter):
|
|
super(ProvisionSuite, self)._handle_result(result, record, waiter)
|
|
if result.is_good():
|
|
self._num_successful += 1
|
|
|
|
def _finished_waiting(self):
|
|
return self._num_successful >= self._num_required
|
|
|
|
|
|
def _load_dummy_test(
|
|
builds,
|
|
devserver,
|
|
cf_getter=None,
|
|
run_prod_code=False,
|
|
test_args=None,
|
|
test_source_build=None):
|
|
"""
|
|
Load and return the dummy pass test.
|
|
|
|
@param builds: the builds on which we're running this suite.
|
|
@param devserver: the devserver which contains the build.
|
|
@param cf_getter: a control_file_getter.ControlFileGetter.
|
|
@param test_args: A dict of args passed all the way to each individual
|
|
test that will be actually ran.
|
|
@param test_source_build: Build that contains the server-side test code.
|
|
"""
|
|
if cf_getter is None:
|
|
if run_prod_code:
|
|
cf_getter = create_fs_getter(_AUTOTEST_DIR)
|
|
else:
|
|
build = suite_common.get_test_source_build(
|
|
builds, test_source_build=test_source_build)
|
|
devserver.stage_artifacts(image=build,
|
|
artifacts=['control_files'])
|
|
cf_getter = _create_ds_getter(build, devserver)
|
|
retriever = _ControlFileRetriever(cf_getter,
|
|
run_prod_code=run_prod_code,
|
|
test_args=test_args)
|
|
return retriever.retrieve_for_test('dummy_Pass')
|
|
|
|
|
|
class _ComposedPredicate(object):
|
|
"""Return the composition of the predicates.
|
|
|
|
Predicates are functions that take a test control data object and
|
|
return True of that test is to be included. The returned
|
|
predicate's set is the intersection of all of the input predicates'
|
|
sets (it returns True if all predicates return True).
|
|
"""
|
|
|
|
def __init__(self, predicates):
|
|
"""Initialize instance.
|
|
|
|
@param predicates: Iterable of predicates.
|
|
"""
|
|
self._predicates = list(predicates)
|
|
|
|
def __repr__(self):
|
|
return '{cls}({this._predicates!r})'.format(
|
|
cls=type(self).__name__,
|
|
this=self,
|
|
)
|
|
|
|
def __call__(self, control_data_):
|
|
return all(f(control_data_) for f in self._predicates)
|
|
|
|
|
|
def _is_nonexistent_board_error(e):
|
|
"""Return True if error is caused by nonexistent board label.
|
|
|
|
As of this writing, the particular case we want looks like this:
|
|
|
|
1) e.problem_keys is a dictionary
|
|
2) e.problem_keys['meta_hosts'] exists as the only key
|
|
in the dictionary.
|
|
3) e.problem_keys['meta_hosts'] matches this pattern:
|
|
"Label "board:.*" not found"
|
|
|
|
We check for conditions 1) and 2) on the
|
|
theory that they're relatively immutable.
|
|
We don't check condition 3) because it seems
|
|
likely to be a maintenance burden, and for the
|
|
times when we're wrong, being right shouldn't
|
|
matter enough (we _hope_).
|
|
|
|
@param e: proxy.ValidationError instance
|
|
@returns: boolean
|
|
"""
|
|
return (isinstance(e.problem_keys, dict)
|
|
and len(e.problem_keys) == 1
|
|
and 'meta_hosts' in e.problem_keys)
|
|
|
|
|
|
class _ResultReporter(six.with_metaclass(abc.ABCMeta, object)):
|
|
"""Abstract base class for reporting test results.
|
|
|
|
Usually, this is used to report test failures.
|
|
"""
|
|
|
|
@abc.abstractmethod
|
|
def report(self, result):
|
|
"""Report test result.
|
|
|
|
@param result: Status instance for job.
|
|
"""
|
|
|
|
|
|
class _EmailReporter(_ResultReporter):
|
|
"""Class that emails based on test failures."""
|
|
|
|
def __init__(self, suite, bug_template=None):
|
|
self._suite = suite
|
|
self._bug_template = bug_template or {}
|
|
|
|
def _get_test_bug(self, result):
|
|
"""Get TestBug for the given result.
|
|
|
|
@param result: Status instance for a test job.
|
|
@returns: TestBug instance.
|
|
"""
|
|
# reporting modules have dependency on external packages, e.g., httplib2
|
|
# Such dependency can cause issue to any module tries to import suite.py
|
|
# without building site-packages first. Since the reporting modules are
|
|
# only used in this function, move the imports here avoid the
|
|
# requirement of building site packages to use other functions in this
|
|
# module.
|
|
from autotest_lib.server.cros.dynamic_suite import reporting
|
|
|
|
job_views = self._suite._tko.run('get_detailed_test_views',
|
|
afe_job_id=result.id)
|
|
return reporting.TestBug(self._suite._job_creator.cros_build,
|
|
utils.get_chrome_version(job_views),
|
|
self._suite._tag,
|
|
result)
|
|
|
|
def _get_bug_template(self, result):
|
|
"""Get BugTemplate for test job.
|
|
|
|
@param result: Status instance for job.
|
|
@param bug_template: A template dictionary specifying the default bug
|
|
filing options for failures in this suite.
|
|
@returns: BugTemplate instance
|
|
"""
|
|
# reporting modules have dependency on external packages, e.g., httplib2
|
|
# Such dependency can cause issue to any module tries to import suite.py
|
|
# without building site-packages first. Since the reporting modules are
|
|
# only used in this function, move the imports here avoid the
|
|
# requirement of building site packages to use other functions in this
|
|
# module.
|
|
from autotest_lib.server.cros.dynamic_suite import reporting_utils
|
|
|
|
# Try to merge with bug template in test control file.
|
|
template = reporting_utils.BugTemplate(self._bug_template)
|
|
try:
|
|
test_data = self._suite._jobs_to_tests[result.id]
|
|
return template.finalize_bug_template(
|
|
test_data.bug_template)
|
|
except AttributeError:
|
|
# Test control file does not have bug template defined.
|
|
return template.bug_template
|
|
except reporting_utils.InvalidBugTemplateException as e:
|
|
logging.error('Merging bug templates failed with '
|
|
'error: %s An empty bug template will '
|
|
'be used.', e)
|
|
return {}
|
|
|
|
def report(self, result):
|
|
# reporting modules have dependency on external
|
|
# packages, e.g., httplib2 Such dependency can cause
|
|
# issue to any module tries to import suite.py without
|
|
# building site-packages first. Since the reporting
|
|
# modules are only used in this function, move the
|
|
# imports here avoid the requirement of building site
|
|
# packages to use other functions in this module.
|
|
from autotest_lib.server.cros.dynamic_suite import reporting
|
|
|
|
reporting.send_email(
|
|
self._get_test_bug(result),
|
|
self._get_bug_template(result))
|