You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
307 lines
13 KiB
307 lines
13 KiB
#!/usr/bin/env python
|
|
#
|
|
# Copyright (C) 2017 The Android Open Source Project
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Outputs quantitative information about Address Sanitizer traces."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
from collections import Counter
|
|
from datetime import datetime
|
|
import argparse
|
|
import bisect
|
|
import os
|
|
import re
|
|
|
|
|
|
def find_match(list_substrings, big_string):
|
|
"""Returns the category a trace belongs to by searching substrings."""
|
|
for ind, substr in enumerate(list_substrings):
|
|
if big_string.find(substr) != -1:
|
|
return ind
|
|
return list_substrings.index("Uncategorized")
|
|
|
|
|
|
def absolute_to_relative(data_lists, symbol_traces):
|
|
"""Address changed to Dex File offset and shifting time to 0 min in ms."""
|
|
|
|
offsets = data_lists["offsets"]
|
|
time_offsets = data_lists["times"]
|
|
|
|
# Format of time provided by logcat
|
|
time_format_str = "%H:%M:%S.%f"
|
|
first_access_time = datetime.strptime(data_lists["plot_list"][0][0],
|
|
time_format_str)
|
|
for ind, elem in enumerate(data_lists["plot_list"]):
|
|
elem_date_time = datetime.strptime(elem[0], time_format_str)
|
|
# Shift time values so that first access is at time 0 milliseconds
|
|
elem[0] = int((elem_date_time - first_access_time).total_seconds() *
|
|
1000)
|
|
address_access = int(elem[1], 16)
|
|
# For each poisoned address, find highest Dex File starting address less
|
|
# than address_access
|
|
dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"])
|
|
dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1
|
|
dex_offset = address_access - dex_start_list[dex_file_ind]
|
|
# Assumes that offsets is already sorted and constrains offset to be
|
|
# within range of the dex_file
|
|
max_offset = min(offsets[1], dex_size_list[dex_file_ind])
|
|
# Meant to nullify data that does not meet offset criteria if specified
|
|
if (dex_offset >= offsets[0] and dex_offset < max_offset and
|
|
elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]):
|
|
|
|
elem.insert(1, dex_offset)
|
|
# Category that a data point belongs to
|
|
elem.insert(2, data_lists["cat_list"][ind])
|
|
else:
|
|
elem[:] = 4 * [None]
|
|
symbol_traces[ind] = None
|
|
data_lists["cat_list"][ind] = None
|
|
|
|
|
|
def print_category_info(cat_split, outname, out_dir_name, title):
|
|
"""Prints information of category and puts related traces in a files."""
|
|
trace_counts_dict = Counter(cat_split)
|
|
trace_counts_list_ordered = trace_counts_dict.most_common()
|
|
print(53 * "-")
|
|
print(title)
|
|
print("\tNumber of distinct traces: " +
|
|
str(len(trace_counts_list_ordered)))
|
|
print("\tSum of trace counts: " +
|
|
str(sum([trace[1] for trace in trace_counts_list_ordered])))
|
|
print("\n\tCount: How many traces appeared with count\n\t", end="")
|
|
print(Counter([trace[1] for trace in trace_counts_list_ordered]))
|
|
with open(os.path.join(out_dir_name, outname), "w") as output_file:
|
|
for trace in trace_counts_list_ordered:
|
|
output_file.write("\n\nNumber of times appeared: " +
|
|
str(trace[1]) +
|
|
"\n")
|
|
output_file.write(trace[0].strip())
|
|
|
|
|
|
def print_categories(categories, symbol_file_split, out_dir_name):
|
|
"""Prints details of all categories."""
|
|
symbol_file_split = [trace for trace in symbol_file_split
|
|
if trace is not None]
|
|
# Info of traces containing a call to current category
|
|
for cat_num, cat_name in enumerate(categories[1:]):
|
|
print("\nCategory #%d" % (cat_num + 1))
|
|
cat_split = [trace for trace in symbol_file_split
|
|
if cat_name in trace]
|
|
cat_file_name = cat_name.lower() + "cat_output"
|
|
print_category_info(cat_split, cat_file_name, out_dir_name,
|
|
"Traces containing: " + cat_name)
|
|
noncat_split = [trace for trace in symbol_file_split
|
|
if cat_name not in trace]
|
|
print_category_info(noncat_split, "non" + cat_file_name,
|
|
out_dir_name,
|
|
"Traces not containing: " +
|
|
cat_name)
|
|
|
|
# All traces (including uncategorized) together
|
|
print_category_info(symbol_file_split, "allcat_output",
|
|
out_dir_name,
|
|
"All traces together:")
|
|
# Traces containing none of keywords
|
|
# Only used if categories are passed in
|
|
if len(categories) > 1:
|
|
noncat_split = [trace for trace in symbol_file_split if
|
|
all(cat_name not in trace
|
|
for cat_name in categories)]
|
|
print_category_info(noncat_split, "noncat_output",
|
|
out_dir_name,
|
|
"Uncategorized calls")
|
|
|
|
|
|
def is_directory(path_name):
|
|
"""Checks if a path is an actual directory."""
|
|
if not os.path.isdir(path_name):
|
|
dir_error = "%s is not a directory" % (path_name)
|
|
raise argparse.ArgumentTypeError(dir_error)
|
|
return path_name
|
|
|
|
|
|
def parse_args(argv):
|
|
"""Parses arguments passed in."""
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-d", action="store",
|
|
default="", dest="out_dir_name", type=is_directory,
|
|
help="Output Directory")
|
|
parser.add_argument("--dex-file", action="store",
|
|
default=None, dest="dex_file",
|
|
type=argparse.FileType("r"),
|
|
help="Baksmali Dex File Dump")
|
|
parser.add_argument("--offsets", action="store", nargs=2,
|
|
default=[float(0), float("inf")],
|
|
dest="offsets",
|
|
metavar="OFFSET",
|
|
type=float,
|
|
help="Filters out accesses not between provided"
|
|
" offsets if provided. Can provide 'inf'"
|
|
" for infinity")
|
|
parser.add_argument("--times", action="store", nargs=2,
|
|
default=[float(0), float("inf")],
|
|
dest="times",
|
|
metavar="TIME",
|
|
type=float,
|
|
help="Filters out accesses not between provided"
|
|
" time offsets if provided. Can provide 'inf'"
|
|
" for infinity")
|
|
parser.add_argument("sanitizer_trace", action="store",
|
|
type=argparse.FileType("r"),
|
|
help="File containing sanitizer traces filtered by "
|
|
"prune_sanitizer_output.py")
|
|
parser.add_argument("symbol_trace", action="store",
|
|
type=argparse.FileType("r"),
|
|
help="File containing symbolized traces that match "
|
|
"sanitizer_trace")
|
|
parser.add_argument("dex_starts", action="store",
|
|
type=argparse.FileType("r"),
|
|
help="File containing starting addresses of Dex Files")
|
|
parser.add_argument("categories", action="store", nargs="*",
|
|
help="Keywords expected to show in large amounts of"
|
|
" symbolized traces")
|
|
|
|
return parser.parse_args(argv)
|
|
|
|
|
|
def get_dex_offset_data(line, dex_file_item):
|
|
""" Returns a tuple of dex file offset, item name, and data of a line."""
|
|
return (int(line[:line.find(":")], 16),
|
|
(dex_file_item,
|
|
line.split("|")[1].strip())
|
|
)
|
|
|
|
|
|
def read_data(parsed_argv):
|
|
"""Reads data from filepath arguments and parses them into lists."""
|
|
# Using a dictionary to establish relation between lists added
|
|
data_lists = {}
|
|
categories = parsed_argv.categories
|
|
# Makes sure each trace maps to some category
|
|
categories.insert(0, "Uncategorized")
|
|
|
|
data_lists["offsets"] = parsed_argv.offsets
|
|
data_lists["offsets"].sort()
|
|
|
|
data_lists["times"] = parsed_argv.times
|
|
data_lists["times"].sort()
|
|
|
|
logcat_file_data = parsed_argv.sanitizer_trace.readlines()
|
|
parsed_argv.sanitizer_trace.close()
|
|
|
|
symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")
|
|
# Removes text before first trace
|
|
symbol_file_split = symbol_file_split[1:]
|
|
parsed_argv.symbol_trace.close()
|
|
|
|
dex_start_file_data = parsed_argv.dex_starts.readlines()
|
|
parsed_argv.dex_starts.close()
|
|
|
|
if parsed_argv.dex_file is not None:
|
|
dex_file_data = parsed_argv.dex_file.read()
|
|
parsed_argv.dex_file.close()
|
|
# Splits baksmali dump by each item
|
|
item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ",
|
|
dex_file_data)]
|
|
# Splits each item by line and creates a list of offsets and a
|
|
# corresponding list of the data associated with that line
|
|
offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0])
|
|
for item in item_split
|
|
for line in item[1:]
|
|
if re.search("[0-9a-f]{6}:", line)
|
|
is not None and
|
|
line.find("|") != -1])
|
|
data_lists["offset_list"] = offset_list
|
|
data_lists["offset_data"] = offset_data
|
|
else:
|
|
dex_file_data = None
|
|
|
|
# Each element is a tuple of time and address accessed
|
|
data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
|
|
if elem[0] in (1, 11)
|
|
]
|
|
for line in logcat_file_data
|
|
if "use-after-poison" in line or
|
|
"unknown-crash" in line
|
|
]
|
|
# Contains a mapping between traces and the category they belong to
|
|
# based on arguments
|
|
data_lists["cat_list"] = [categories[find_match(categories, trace)]
|
|
for trace in symbol_file_split]
|
|
|
|
# Contains a list of starting address of all dex files to calculate dex
|
|
# offsets
|
|
data_lists["dex_ends_list"] = [(int(line.split()[9], 16),
|
|
int(line.split()[12])
|
|
)
|
|
for line in dex_start_file_data
|
|
if "RegisterDexFile" in line
|
|
]
|
|
# Dex File Starting addresses must be sorted because bisect requires sorted
|
|
# lists.
|
|
data_lists["dex_ends_list"].sort()
|
|
|
|
return data_lists, categories, symbol_file_split
|
|
|
|
|
|
def main():
|
|
"""Takes in trace information and outputs details about them."""
|
|
parsed_argv = parse_args(None)
|
|
data_lists, categories, symbol_file_split = read_data(parsed_argv)
|
|
|
|
# Formats plot_list such that each element is a data point
|
|
absolute_to_relative(data_lists, symbol_file_split)
|
|
for file_ext, cat_name in enumerate(categories):
|
|
out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
|
|
str(file_ext) +
|
|
".dat")
|
|
with open(out_file_name, "w") as output_file:
|
|
output_file.write("# Category: " + cat_name + "\n")
|
|
output_file.write("# Time, Dex File Offset_10, Dex File Offset_16,"
|
|
" Address, Item Accessed, Item Member Accessed"
|
|
" Unaligned\n")
|
|
for time, dex_offset, category, address in data_lists["plot_list"]:
|
|
if category == cat_name:
|
|
output_file.write(
|
|
str(time) +
|
|
" " +
|
|
str(dex_offset) +
|
|
" #" +
|
|
hex(dex_offset) +
|
|
" " +
|
|
str(address))
|
|
if "offset_list" in data_lists:
|
|
dex_offset_index = bisect.bisect(
|
|
data_lists["offset_list"],
|
|
dex_offset) - 1
|
|
aligned_dex_offset = (data_lists["offset_list"]
|
|
[dex_offset_index])
|
|
dex_offset_data = (data_lists["offset_data"]
|
|
[dex_offset_index])
|
|
output_file.write(
|
|
" " +
|
|
"|".join(dex_offset_data) +
|
|
" " +
|
|
str(aligned_dex_offset != dex_offset))
|
|
output_file.write("\n")
|
|
print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|