You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
17 KiB
442 lines
17 KiB
#!/usr/bin/python2
|
|
# Copyright 2017 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""
|
|
This is a utility to build a summary of the given directory. and save to a json
|
|
file.
|
|
|
|
usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB]
|
|
|
|
optional arguments:
|
|
-p PATH Path to build directory summary.
|
|
-m MAX_SIZE_KB Maximum result size in KB. Set to 0 to disable result
|
|
throttling.
|
|
|
|
The content of the json file looks like:
|
|
{'default': {'/D': [{'control': {'/S': 734}},
|
|
{'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}},
|
|
{'client.0.ERROR': {'/S': 254}},
|
|
{'client.0.INFO': {'/S': 1020}},
|
|
{'client.0.WARNING': {'/S': 242}}],
|
|
'/S': 7214}}
|
|
],
|
|
'/S': 7948
|
|
}
|
|
}
|
|
"""
|
|
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import copy
|
|
import fnmatch
|
|
import glob
|
|
import json
|
|
import logging
|
|
import os
|
|
import random
|
|
from six.moves import range
|
|
import sys
|
|
import time
|
|
import traceback
|
|
|
|
try:
|
|
from autotest_lib.client.bin.result_tools import dedupe_file_throttler
|
|
from autotest_lib.client.bin.result_tools import delete_file_throttler
|
|
from autotest_lib.client.bin.result_tools import result_info
|
|
from autotest_lib.client.bin.result_tools import shrink_file_throttler
|
|
from autotest_lib.client.bin.result_tools import throttler_lib
|
|
from autotest_lib.client.bin.result_tools import utils_lib
|
|
from autotest_lib.client.bin.result_tools import zip_file_throttler
|
|
except ImportError:
|
|
import dedupe_file_throttler
|
|
import delete_file_throttler
|
|
import result_info
|
|
import shrink_file_throttler
|
|
import throttler_lib
|
|
import utils_lib
|
|
import zip_file_throttler
|
|
|
|
|
|
# Do NOT import autotest_lib modules here. This module can be executed without
|
|
# dependency on other autotest modules. This is to keep the logic of result
|
|
# trimming on the server side, instead of depending on the autotest client
|
|
# module.
|
|
|
|
DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
|
|
SUMMARY_FILE_PATTERN = 'dir_summary_*.json'
|
|
MERGED_SUMMARY_FILENAME = 'dir_summary_final.json'
|
|
|
|
# Minimum disk space should be available after saving the summary file.
|
|
MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
|
|
|
|
# Autotest uses some state files to track process running state. The files are
|
|
# deleted from test results. Therefore, these files can be ignored.
|
|
FILES_TO_IGNORE = set([
|
|
'control.autoserv.state'
|
|
])
|
|
|
|
# Smallest file size to shrink to.
|
|
MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024
|
|
|
|
def get_unique_dir_summary_file(path):
|
|
"""Get a unique file path to save the directory summary json string.
|
|
|
|
@param path: The directory path to save the summary file to.
|
|
"""
|
|
summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
|
|
# Make sure the summary file name is unique.
|
|
file_name = os.path.join(path, summary_file)
|
|
if os.path.exists(file_name):
|
|
count = 1
|
|
name, ext = os.path.splitext(summary_file)
|
|
while os.path.exists(file_name):
|
|
file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
|
|
count += 1
|
|
return file_name
|
|
|
|
|
|
def _preprocess_result_dir_path(path):
|
|
"""Verify the result directory path is valid and make sure it ends with `/`.
|
|
|
|
@param path: A path to the result directory.
|
|
@return: A verified and processed path to the result directory.
|
|
@raise IOError: If the path doesn't exist.
|
|
@raise ValueError: If the path is not a directory.
|
|
"""
|
|
if not os.path.exists(path):
|
|
raise IOError('Path %s does not exist.' % path)
|
|
|
|
if not os.path.isdir(path):
|
|
raise ValueError('The given path %s is a file. It must be a '
|
|
'directory.' % path)
|
|
|
|
# Make sure the path ends with `/` so the root key of summary json is always
|
|
# utils_lib.ROOT_DIR ('')
|
|
if not path.endswith(os.sep):
|
|
path = path + os.sep
|
|
|
|
return path
|
|
|
|
|
|
def _delete_missing_entries(summary_old, summary_new):
|
|
"""Delete files/directories only exists in old summary.
|
|
|
|
When the new summary is final, i.e., it's built from the final result
|
|
directory, files or directories missing are considered to be deleted and
|
|
trimmed to size 0.
|
|
|
|
@param summary_old: Old directory summary.
|
|
@param summary_new: New directory summary.
|
|
"""
|
|
new_files = summary_new.get_file_names()
|
|
old_files = summary_old.get_file_names()
|
|
for name in old_files:
|
|
old_file = summary_old.get_file(name)
|
|
if name not in new_files:
|
|
if old_file.is_dir:
|
|
# Trim sub-directories.
|
|
with old_file.disable_updating_parent_size_info():
|
|
_delete_missing_entries(old_file, result_info.EMPTY)
|
|
old_file.update_sizes()
|
|
elif name in FILES_TO_IGNORE:
|
|
# Remove the file from the summary as it can be ignored.
|
|
summary_old.remove_file(name)
|
|
else:
|
|
with old_file.disable_updating_parent_size_info():
|
|
# Before setting the trimmed size to 0, update the collected
|
|
# size if it's not set yet.
|
|
if not old_file.is_collected_size_recorded:
|
|
old_file.collected_size = old_file.trimmed_size
|
|
old_file.trimmed_size = 0
|
|
elif old_file.is_dir:
|
|
# If `name` is a directory in the old summary, but a file in the new
|
|
# summary, delete the entry in the old summary.
|
|
new_file = summary_new.get_file(name)
|
|
if not new_file.is_dir:
|
|
new_file = result_info.EMPTY
|
|
_delete_missing_entries(old_file, new_file)
|
|
|
|
|
|
def _relocate_summary(result_dir, summary_file, summary):
|
|
"""Update the given summary with the path relative to the result_dir.
|
|
|
|
@param result_dir: Path to the result directory.
|
|
@param summary_file: Path to the summary file.
|
|
@param summary: A directory summary inside the given result_dir or its
|
|
sub-directory.
|
|
@return: An updated summary with the path relative to the result_dir.
|
|
"""
|
|
sub_path = os.path.dirname(summary_file).replace(
|
|
result_dir.rstrip(os.sep), '')
|
|
if sub_path == '':
|
|
return summary
|
|
|
|
folders = sub_path.split(os.sep)
|
|
|
|
# The first folder is always '' because of the leading `/` in sub_path.
|
|
parent = result_info.ResultInfo(
|
|
result_dir, utils_lib.ROOT_DIR, parent_result_info=None)
|
|
root = parent
|
|
|
|
# That makes sure root has only one folder of utils_lib.ROOT_DIR.
|
|
for i in range(1, len(folders)):
|
|
child = result_info.ResultInfo(
|
|
parent.path, folders[i], parent_result_info=parent)
|
|
if i == len(folders) - 1:
|
|
# Add files in summary to child.
|
|
for info in summary.files:
|
|
child.files.append(info)
|
|
|
|
parent.files.append(child)
|
|
parent = child
|
|
|
|
parent.update_sizes()
|
|
return root
|
|
|
|
|
|
def merge_summaries(path):
|
|
"""Merge all directory summaries in the given path.
|
|
|
|
This function calculates the total size of result files being collected for
|
|
the test device and the files generated on the drone. It also returns merged
|
|
directory summary.
|
|
|
|
@param path: A path to search for directory summaries.
|
|
@return a tuple of (client_collected_bytes, merged_summary, files):
|
|
client_collected_bytes: The total size of results collected from
|
|
the DUT. The number can be larger than the total file size of
|
|
the given path, as files can be overwritten or removed.
|
|
merged_summary: The merged directory summary of the given path.
|
|
files: All summary files in the given path, including
|
|
sub-directories.
|
|
"""
|
|
path = _preprocess_result_dir_path(path)
|
|
# Find all directory summary files and sort them by the time stamp in file
|
|
# name.
|
|
summary_files = []
|
|
for root, _, filenames in os.walk(path):
|
|
for filename in fnmatch.filter(filenames, 'dir_summary_*.json'):
|
|
summary_files.append(os.path.join(root, filename))
|
|
summary_files = sorted(summary_files, key=os.path.getmtime)
|
|
|
|
all_summaries = []
|
|
for summary_file in summary_files:
|
|
try:
|
|
summary = result_info.load_summary_json_file(summary_file)
|
|
summary = _relocate_summary(path, summary_file, summary)
|
|
all_summaries.append(summary)
|
|
except (IOError, ValueError) as e:
|
|
utils_lib.LOG('Failed to load summary file %s Error: %s' %
|
|
(summary_file, e))
|
|
|
|
# Merge all summaries.
|
|
merged_summary = all_summaries[0] if len(all_summaries) > 0 else None
|
|
for summary in all_summaries[1:]:
|
|
merged_summary.merge(summary)
|
|
# After all summaries from the test device (client side) are merged, we can
|
|
# get the total size of result files being transfered from the test device.
|
|
# If there is no directory summary collected, default client_collected_bytes
|
|
# to 0.
|
|
client_collected_bytes = 0
|
|
if merged_summary:
|
|
client_collected_bytes = merged_summary.collected_size
|
|
|
|
# Get the summary of current directory
|
|
last_summary = result_info.ResultInfo.build_from_path(path)
|
|
|
|
if merged_summary:
|
|
merged_summary.merge(last_summary, is_final=True)
|
|
_delete_missing_entries(merged_summary, last_summary)
|
|
else:
|
|
merged_summary = last_summary
|
|
|
|
return client_collected_bytes, merged_summary, summary_files
|
|
|
|
|
|
def _throttle_results(summary, max_result_size_KB):
|
|
"""Throttle the test results by limiting to the given maximum size.
|
|
|
|
@param summary: A ResultInfo object containing result summary.
|
|
@param max_result_size_KB: Maximum test result size in KB.
|
|
"""
|
|
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
|
|
utils_lib.LOG(
|
|
'Result size is %s, which is less than %d KB. No need to '
|
|
'throttle.' %
|
|
(utils_lib.get_size_string(summary.trimmed_size),
|
|
max_result_size_KB))
|
|
return
|
|
|
|
args = {'summary': summary,
|
|
'max_result_size_KB': max_result_size_KB}
|
|
args_skip_autotest_log = copy.copy(args)
|
|
args_skip_autotest_log['skip_autotest_log'] = True
|
|
# Apply the throttlers in following order.
|
|
throttlers = [
|
|
(shrink_file_throttler, copy.copy(args_skip_autotest_log)),
|
|
(zip_file_throttler, copy.copy(args_skip_autotest_log)),
|
|
(shrink_file_throttler, copy.copy(args)),
|
|
(dedupe_file_throttler, copy.copy(args)),
|
|
(zip_file_throttler, copy.copy(args)),
|
|
]
|
|
|
|
# Add another zip_file_throttler to compress the files being shrunk.
|
|
# The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of
|
|
# shrink_file_throttler.
|
|
new_args = copy.copy(args)
|
|
new_args['file_size_threshold_byte'] = 50 * 1024
|
|
throttlers.append((zip_file_throttler, new_args))
|
|
|
|
# If the above throttlers still can't reduce the result size to be under
|
|
# max_result_size_KB, try to delete files with various threshold, starting
|
|
# at 5MB then lowering to 100KB.
|
|
delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024]
|
|
# Try to keep tgz files first.
|
|
exclude_file_patterns = ['.*\.tgz']
|
|
for threshold in delete_file_thresholds:
|
|
new_args = copy.copy(args)
|
|
new_args.update({'file_size_threshold_byte': threshold,
|
|
'exclude_file_patterns': exclude_file_patterns})
|
|
throttlers.append((delete_file_throttler, new_args))
|
|
# Add one more delete_file_throttler to not skipping tgz files.
|
|
new_args = copy.copy(args)
|
|
new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]})
|
|
throttlers.append((delete_file_throttler, new_args))
|
|
|
|
# Run the throttlers in order until result size is under max_result_size_KB.
|
|
old_size = summary.trimmed_size
|
|
for throttler, args in throttlers:
|
|
try:
|
|
args_without_summary = copy.copy(args)
|
|
del args_without_summary['summary']
|
|
utils_lib.LOG('Applying throttler %s, args: %s' %
|
|
(throttler.__name__, args_without_summary))
|
|
throttler.throttle(**args)
|
|
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
|
|
return
|
|
except:
|
|
utils_lib.LOG('Failed to apply throttler %s. Exception: %s' %
|
|
(throttler, traceback.format_exc()))
|
|
finally:
|
|
new_size = summary.trimmed_size
|
|
if new_size == old_size:
|
|
utils_lib.LOG('Result size was not changed: %s.' % old_size)
|
|
else:
|
|
utils_lib.LOG('Result size was reduced from %s to %s.' %
|
|
(utils_lib.get_size_string(old_size),
|
|
utils_lib.get_size_string(new_size)))
|
|
|
|
|
|
def _setup_logging():
|
|
"""Set up logging to direct logs to stdout."""
|
|
# Direct logging to stdout
|
|
logger = logging.getLogger()
|
|
logger.setLevel(logging.DEBUG)
|
|
handler = logging.StreamHandler(sys.stdout)
|
|
handler.setLevel(logging.DEBUG)
|
|
formatter = logging.Formatter('%(asctime)s %(message)s')
|
|
handler.setFormatter(formatter)
|
|
logger.handlers = []
|
|
logger.addHandler(handler)
|
|
|
|
|
|
def _parse_options():
|
|
"""Options for the main script.
|
|
|
|
@return: An option object container arg values.
|
|
"""
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-p', type=str, dest='path',
|
|
help='Path to build directory summary.')
|
|
parser.add_argument('-m', type=int, dest='max_size_KB', default=0,
|
|
help='Maximum result size in KB. Set to 0 to disable '
|
|
'result throttling.')
|
|
parser.add_argument('-d', action='store_true', dest='delete_summaries',
|
|
default=False,
|
|
help='-d to delete all result summary files in the '
|
|
'given path.')
|
|
return parser.parse_args()
|
|
|
|
|
|
def execute(path, max_size_KB):
|
|
"""Execute the script with given arguments.
|
|
|
|
@param path: Path to build directory summary.
|
|
@param max_size_KB: Maximum result size in KB.
|
|
"""
|
|
utils_lib.LOG('Running result_tools/utils on path: %s' % path)
|
|
if max_size_KB > 0:
|
|
utils_lib.LOG('Throttle result size to : %s' %
|
|
utils_lib.get_size_string(max_size_KB * 1024))
|
|
|
|
result_dir = path
|
|
if not os.path.isdir(result_dir):
|
|
result_dir = os.path.dirname(result_dir)
|
|
summary = result_info.ResultInfo.build_from_path(path)
|
|
summary_json = json.dumps(summary)
|
|
summary_file = get_unique_dir_summary_file(result_dir)
|
|
|
|
# Make sure there is enough free disk to write the file
|
|
stat = os.statvfs(path)
|
|
free_space = stat.f_frsize * stat.f_bavail
|
|
if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
|
|
raise utils_lib.NotEnoughDiskError(
|
|
'Not enough disk space after saving the summary file. '
|
|
'Available free disk: %s bytes. Summary file size: %s bytes.' %
|
|
(free_space, len(summary_json)))
|
|
|
|
with open(summary_file, 'w') as f:
|
|
f.write(summary_json)
|
|
utils_lib.LOG('Directory summary of %s is saved to file %s.' %
|
|
(path, summary_file))
|
|
|
|
if max_size_KB > 0 and summary.trimmed_size > 0:
|
|
old_size = summary.trimmed_size
|
|
throttle_probability = float(max_size_KB * 1024) / old_size
|
|
if random.random() < throttle_probability:
|
|
utils_lib.LOG(
|
|
'Skip throttling %s: size=%s, throttle_probability=%s' %
|
|
(path, old_size, throttle_probability))
|
|
else:
|
|
_throttle_results(summary, max_size_KB)
|
|
if summary.trimmed_size < old_size:
|
|
# Files are throttled, save the updated summary file.
|
|
utils_lib.LOG('Overwrite the summary file: %s' % summary_file)
|
|
result_info.save_summary(summary, summary_file)
|
|
|
|
|
|
def _delete_summaries(path):
|
|
"""Delete all directory summary files in the given directory.
|
|
|
|
This is to cleanup the directory so no summary files are left behind to
|
|
affect later tests.
|
|
|
|
@param path: Path to cleanup directory summary.
|
|
"""
|
|
# Only summary files directly under the `path` needs to be cleaned.
|
|
summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN))
|
|
for summary in summary_files:
|
|
try:
|
|
os.remove(summary)
|
|
except IOError as e:
|
|
utils_lib.LOG('Failed to delete summary: %s. Error: %s' %
|
|
(summary, e))
|
|
|
|
|
|
def main():
|
|
"""main script. """
|
|
_setup_logging()
|
|
options = _parse_options()
|
|
if options.delete_summaries:
|
|
_delete_summaries(options.path)
|
|
else:
|
|
execute(options.path, options.max_size_KB)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|