You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

498 lines
19 KiB

# DExTer : Debugging Experience Tester
# ~~~~~~ ~ ~~ ~ ~~
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Calculate a 'score' based on some dextIR.
Assign penalties based on different commands to decrease the score.
1.000 would be a perfect score.
0.000 is the worst theoretical score possible.
"""
from collections import defaultdict, namedtuple, Counter
import difflib
import os
from itertools import groupby
from dex.command.StepValueInfo import StepValueInfo
PenaltyCommand = namedtuple('PenaltyCommand', ['pen_dict', 'max_penalty'])
# 'meta' field used in different ways by different things
PenaltyInstance = namedtuple('PenaltyInstance', ['meta', 'the_penalty'])
def add_heuristic_tool_arguments(parser):
parser.add_argument(
'--penalty-variable-optimized',
type=int,
default=3,
help='set the penalty multiplier for each'
' occurrence of a variable that was optimized'
' away',
metavar='<int>')
parser.add_argument(
'--penalty-misordered-values',
type=int,
default=3,
help='set the penalty multiplier for each'
' occurrence of a misordered value.',
metavar='<int>')
parser.add_argument(
'--penalty-irretrievable',
type=int,
default=4,
help='set the penalty multiplier for each'
" occurrence of a variable that couldn't"
' be retrieved',
metavar='<int>')
parser.add_argument(
'--penalty-not-evaluatable',
type=int,
default=5,
help='set the penalty multiplier for each'
" occurrence of a variable that couldn't"
' be evaluated',
metavar='<int>')
parser.add_argument(
'--penalty-missing-values',
type=int,
default=6,
help='set the penalty multiplier for each missing'
' value',
metavar='<int>')
parser.add_argument(
'--penalty-incorrect-values',
type=int,
default=7,
help='set the penalty multiplier for each'
' occurrence of an unexpected value.',
metavar='<int>')
parser.add_argument(
'--penalty-unreachable',
type=int,
default=4, # XXX XXX XXX selected by random
help='set the penalty for each line stepped onto that should'
' have been unreachable.',
metavar='<int>')
parser.add_argument(
'--penalty-misordered-steps',
type=int,
default=2, # XXX XXX XXX selected by random
help='set the penalty for differences in the order of steps'
' the program was expected to observe.',
metavar='<int>')
parser.add_argument(
'--penalty-missing-step',
type=int,
default=4, # XXX XXX XXX selected by random
help='set the penalty for the program skipping over a step.',
metavar='<int>')
parser.add_argument(
'--penalty-incorrect-program-state',
type=int,
default=4, # XXX XXX XXX selected by random
help='set the penalty for the program never entering an expected state'
' or entering an unexpected state.',
metavar='<int>')
class Heuristic(object):
def __init__(self, context, steps):
self.context = context
self.penalties = {}
worst_penalty = max([
self.penalty_variable_optimized, self.penalty_irretrievable,
self.penalty_not_evaluatable, self.penalty_incorrect_values,
self.penalty_missing_values, self.penalty_unreachable,
self.penalty_missing_step, self.penalty_misordered_steps
])
# Get DexExpectWatchType results.
try:
for command in steps.commands['DexExpectWatchType']:
command.eval(steps)
maximum_possible_penalty = min(3, len(
command.values)) * worst_penalty
name, p = self._calculate_expect_watch_penalties(
command, maximum_possible_penalty)
name = name + ' ExpectType'
self.penalties[name] = PenaltyCommand(p,
maximum_possible_penalty)
except KeyError:
pass
# Get DexExpectWatchValue results.
try:
for command in steps.commands['DexExpectWatchValue']:
command.eval(steps)
maximum_possible_penalty = min(3, len(
command.values)) * worst_penalty
name, p = self._calculate_expect_watch_penalties(
command, maximum_possible_penalty)
name = name + ' ExpectValue'
self.penalties[name] = PenaltyCommand(p,
maximum_possible_penalty)
except KeyError:
pass
try:
penalties = defaultdict(list)
maximum_possible_penalty_all = 0
for expect_state in steps.commands['DexExpectProgramState']:
success = expect_state.eval(steps)
p = 0 if success else self.penalty_incorrect_program_state
meta = 'expected {}: {}'.format(
'{} times'.format(expect_state.times)
if expect_state.times >= 0 else 'at least once',
expect_state.program_state_text)
if success:
meta = '<g>{}</>'.format(meta)
maximum_possible_penalty = self.penalty_incorrect_program_state
maximum_possible_penalty_all += maximum_possible_penalty
name = expect_state.program_state_text
penalties[meta] = [PenaltyInstance('{} times'.format(
len(expect_state.encounters)), p)]
self.penalties['expected program states'] = PenaltyCommand(
penalties, maximum_possible_penalty_all)
except KeyError:
pass
# Get the total number of each step kind.
step_kind_counts = defaultdict(int)
for step in getattr(steps, 'steps'):
step_kind_counts[step.step_kind] += 1
# Get DexExpectStepKind results.
penalties = defaultdict(list)
maximum_possible_penalty_all = 0
try:
for command in steps.commands['DexExpectStepKind']:
command.eval()
# Cap the penalty at 2 * expected count or else 1
maximum_possible_penalty = max(command.count * 2, 1)
p = abs(command.count - step_kind_counts[command.name])
actual_penalty = min(p, maximum_possible_penalty)
key = ('{}'.format(command.name)
if actual_penalty else '<g>{}</>'.format(command.name))
penalties[key] = [PenaltyInstance(p, actual_penalty)]
maximum_possible_penalty_all += maximum_possible_penalty
self.penalties['step kind differences'] = PenaltyCommand(
penalties, maximum_possible_penalty_all)
except KeyError:
pass
if 'DexUnreachable' in steps.commands:
cmds = steps.commands['DexUnreachable']
unreach_count = 0
# Find steps with unreachable in them
ureachs = [
s for s in steps.steps if 'DexUnreachable' in s.watches.keys()
]
# There's no need to match up cmds with the actual watches
upen = self.penalty_unreachable
count = upen * len(ureachs)
if count != 0:
d = dict()
for x in ureachs:
msg = 'line {} reached'.format(x.current_location.lineno)
d[msg] = [PenaltyInstance(upen, upen)]
else:
d = {
'<g>No unreachable lines seen</>': [PenaltyInstance(0, 0)]
}
total = PenaltyCommand(d, len(cmds) * upen)
self.penalties['unreachable lines'] = total
if 'DexExpectStepOrder' in steps.commands:
cmds = steps.commands['DexExpectStepOrder']
# Form a list of which line/cmd we _should_ have seen
cmd_num_lst = [(x, c.lineno) for c in cmds
for x in c.sequence]
# Order them by the sequence number
cmd_num_lst.sort(key=lambda t: t[0])
# Strip out sequence key
cmd_num_lst = [y for x, y in cmd_num_lst]
# Now do the same, but for the actually observed lines/cmds
ss = steps.steps
deso = [s for s in ss if 'DexExpectStepOrder' in s.watches.keys()]
deso = [s.watches['DexExpectStepOrder'] for s in deso]
# We rely on the steps remaining in order here
order_list = [int(x.expression) for x in deso]
# First off, check to see whether or not there are missing items
expected = Counter(cmd_num_lst)
seen = Counter(order_list)
unseen_line_dict = dict()
skipped_line_dict = dict()
mispen = self.penalty_missing_step
num_missing = 0
num_repeats = 0
for k, v in expected.items():
if k not in seen:
msg = 'Line {} not seen'.format(k)
unseen_line_dict[msg] = [PenaltyInstance(mispen, mispen)]
num_missing += v
elif v > seen[k]:
msg = 'Line {} skipped at least once'.format(k)
skipped_line_dict[msg] = [PenaltyInstance(mispen, mispen)]
num_missing += v - seen[k]
elif v < seen[k]:
# Don't penalise unexpected extra sightings of a line
# for now
num_repeats = seen[k] - v
pass
if len(unseen_line_dict) == 0:
pi = PenaltyInstance(0, 0)
unseen_line_dict['<g>All lines were seen</>'] = [pi]
if len(skipped_line_dict) == 0:
pi = PenaltyInstance(0, 0)
skipped_line_dict['<g>No lines were skipped</>'] = [pi]
total = PenaltyCommand(unseen_line_dict, len(expected) * mispen)
self.penalties['Unseen lines'] = total
total = PenaltyCommand(skipped_line_dict, len(expected) * mispen)
self.penalties['Skipped lines'] = total
ordpen = self.penalty_misordered_steps
cmd_num_lst = [str(x) for x in cmd_num_lst]
order_list = [str(x) for x in order_list]
lst = list(difflib.Differ().compare(cmd_num_lst, order_list))
diff_detail = Counter(l[0] for l in lst)
assert '?' not in diff_detail
# Diffs are hard to interpret; there are many algorithms for
# condensing them. Ignore all that, and just print out the changed
# sequences, it's up to the user to interpret what's going on.
def filt_lines(s, seg, e, key):
lst = [s]
for x in seg:
if x[0] == key:
lst.append(int(x[2:]))
lst.append(e)
return lst
diff_msgs = dict()
def reportdiff(start_idx, segment, end_idx):
msg = 'Order mismatch, expected linenos {}, saw {}'
expected_linenos = filt_lines(start_idx, segment, end_idx, '-')
seen_linenos = filt_lines(start_idx, segment, end_idx, '+')
msg = msg.format(expected_linenos, seen_linenos)
diff_msgs[msg] = [PenaltyInstance(ordpen, ordpen)]
# Group by changed segments.
start_expt_step = 0
end_expt_step = 0
to_print_lst = []
for k, subit in groupby(lst, lambda x: x[0] == ' '):
if k: # Whitespace group
nochanged = [x for x in subit]
end_expt_step = int(nochanged[0][2:])
if len(to_print_lst) > 0:
reportdiff(start_expt_step, to_print_lst,
end_expt_step)
start_expt_step = int(nochanged[-1][2:])
to_print_lst = []
else: # Diff group, save for printing
to_print_lst = [x for x in subit]
# If there was a dangling different step, print that too.
if len(to_print_lst) > 0:
reportdiff(start_expt_step, to_print_lst, '[End]')
if len(diff_msgs) == 0:
diff_msgs['<g>No lines misordered</>'] = [
PenaltyInstance(0, 0)
]
total = PenaltyCommand(diff_msgs, len(cmd_num_lst) * ordpen)
self.penalties['Misordered lines'] = total
return
def _calculate_expect_watch_penalties(self, c, maximum_possible_penalty):
penalties = defaultdict(list)
if c.line_range[0] == c.line_range[-1]:
line_range = str(c.line_range[0])
else:
line_range = '{}-{}'.format(c.line_range[0], c.line_range[-1])
name = '{}:{} [{}]'.format(
os.path.basename(c.path), line_range, c.expression)
num_actual_watches = len(c.expected_watches) + len(
c.unexpected_watches)
penalty_available = maximum_possible_penalty
# Only penalize for missing values if we have actually seen a watch
# that's returned us an actual value at some point, or if we've not
# encountered the value at all.
if num_actual_watches or c.times_encountered == 0:
for v in c.missing_values:
current_penalty = min(penalty_available,
self.penalty_missing_values)
penalty_available -= current_penalty
penalties['missing values'].append(
PenaltyInstance(v, current_penalty))
for v in c.encountered_values:
penalties['<g>expected encountered watches</>'].append(
PenaltyInstance(v, 0))
penalty_descriptions = [
(self.penalty_not_evaluatable, c.invalid_watches,
'could not evaluate'),
(self.penalty_variable_optimized, c.optimized_out_watches,
'result optimized away'),
(self.penalty_misordered_values, c.misordered_watches,
'misordered result'),
(self.penalty_irretrievable, c.irretrievable_watches,
'result could not be retrieved'),
(self.penalty_incorrect_values, c.unexpected_watches,
'unexpected result'),
]
for penalty_score, watches, description in penalty_descriptions:
# We only penalize the encountered issue for each missing value per
# command but we still want to record each one, so set the penalty
# to 0 after the threshold is passed.
times_to_penalize = len(c.missing_values)
for w in watches:
times_to_penalize -= 1
penalty_score = min(penalty_available, penalty_score)
penalty_available -= penalty_score
penalties[description].append(
PenaltyInstance(w, penalty_score))
if not times_to_penalize:
penalty_score = 0
return name, penalties
@property
def penalty(self):
result = 0
maximum_allowed_penalty = 0
for name, pen_cmd in self.penalties.items():
maximum_allowed_penalty += pen_cmd.max_penalty
value = pen_cmd.pen_dict
for category, inst_list in value.items():
result += sum(x.the_penalty for x in inst_list)
return min(result, maximum_allowed_penalty)
@property
def max_penalty(self):
return sum(p_cat.max_penalty for p_cat in self.penalties.values())
@property
def score(self):
try:
return 1.0 - (self.penalty / float(self.max_penalty))
except ZeroDivisionError:
return float('nan')
@property
def summary_string(self):
score = self.score
isnan = score != score # pylint: disable=comparison-with-itself
color = 'g'
if score < 0.25 or isnan:
color = 'r'
elif score < 0.75:
color = 'y'
return '<{}>({:.4f})</>'.format(color, score)
@property
def verbose_output(self): # noqa
string = ''
string += ('\n')
for command in sorted(self.penalties):
pen_cmd = self.penalties[command]
maximum_possible_penalty = pen_cmd.max_penalty
total_penalty = 0
lines = []
for category in sorted(pen_cmd.pen_dict):
lines.append(' <r>{}</>:\n'.format(category))
for result, penalty in pen_cmd.pen_dict[category]:
if isinstance(result, StepValueInfo):
text = 'step {}'.format(result.step_index)
if result.expected_value:
text += ' ({})'.format(result.expected_value)
else:
text = str(result)
if penalty:
assert penalty > 0, penalty
total_penalty += penalty
text += ' <r>[-{}]</>'.format(penalty)
lines.append(' {}\n'.format(text))
lines.append('\n')
string += (' <b>{}</> <y>[{}/{}]</>\n'.format(
command, total_penalty, maximum_possible_penalty))
for line in lines:
string += (line)
string += ('\n')
return string
@property
def penalty_variable_optimized(self):
return self.context.options.penalty_variable_optimized
@property
def penalty_irretrievable(self):
return self.context.options.penalty_irretrievable
@property
def penalty_not_evaluatable(self):
return self.context.options.penalty_not_evaluatable
@property
def penalty_incorrect_values(self):
return self.context.options.penalty_incorrect_values
@property
def penalty_missing_values(self):
return self.context.options.penalty_missing_values
@property
def penalty_misordered_values(self):
return self.context.options.penalty_misordered_values
@property
def penalty_unreachable(self):
return self.context.options.penalty_unreachable
@property
def penalty_missing_step(self):
return self.context.options.penalty_missing_step
@property
def penalty_misordered_steps(self):
return self.context.options.penalty_misordered_steps
@property
def penalty_incorrect_program_state(self):
return self.context.options.penalty_incorrect_program_state