#!/usr/bin/env python2 # Copyright 2016 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """ This module is used to upload csv files generated by performance related tests to cns. More details about the implementation can be found in crbug.com/598504. The overall work flow is as follows. 1. Query tko_test_attributes table for perf_csv_folder attribute. The attribute contains a path to csv files need to be uploaded to cns. 2. Filter the perf_csv_folder attributes only for test jobs have finished an hour before. This is to make sure the results have already being uploaded to GS. 3. Locate the csv files in GS, and upload them to desired cns location. After every run, the script saves the maximum test idx to a local file, and repeats the workflow. """ import argparse import datetime import logging import os import shutil import tempfile import time import common from autotest_lib.client.bin import utils from autotest_lib.client.common_lib import logging_config from autotest_lib.client.common_lib.cros import retry from autotest_lib.frontend import setup_django_environment from autotest_lib.frontend.tko import models as tko_models # Number of hours that a test has to be finished for the script to process. # This allows gs_offloader to have enough time to upload the results to GS. CUTOFF_TIME_HOURS = 1 # Default wait time in seconds after each run. DEFAULT_INTERVAL_SEC = 60 # Timeout in minutes for upload attempts for a given folder. UPLOAD_TIMEOUT_MINS = 5 class CsvNonexistenceException(Exception): """Exception raised when csv files not found in GS.""" class CsvFolder(object): """A class contains the information of a folder storing csv files to be uploaded, and logic to upload the csv files. """ # A class variable whose value is the GoogleStorage path to the test # results. gs_path = None # A class variable whose value is the cns path to upload the csv files to. cns_path = None def __init__(self, test_attribute_id, perf_csv_folder, test_view): """Initialize a CsvFolder object. @param test_attribute_id: ID of test attribute record. @param perf_csv_folder: Path of the folder contains csv files in test results. It's the value of perf_csv_folder attribute from tko_test_attributes table. @param test_view: A db object from querying tko_test_view_2 for the related tko_test_attributes. """ self.test_attribute_id = test_attribute_id self.perf_csv_folder = perf_csv_folder self.test_view = test_view def __str__(self): return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag, self.perf_csv_folder) def _get_url(self): """Get the url to the folder storing csv files in GS. The url can be formulated based on csv folder, test_name and hostname. For example: gs://chromeos-autotest-results/123-chromeos-test/host1/ gsutil is used to download the csv files with this gs url. """ return os.path.join(self.gs_path, self.test_view.job_tag) def _download(self, dest_dir): """Download the folder containing csv files to the given dest_dir. @param dest_dir: A directory to store the downloaded csv files. @return: A list of strings, each is a path to a csv file in the downloaded folder. @raise CsvNonexistenceException: If no csv file found in the GS. """ gs_url = self._get_url() # Find all csv files in given GS url recursively files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' % gs_url, ignore_status=True).stdout.strip().split('\n') if not files or files == ['']: raise CsvNonexistenceException('No csv file found in %s', gs_url) # Copy files from GS to temp_dir for f in files: utils.run('gsutil cp %s %s' % (f, dest_dir)) @retry.retry(Exception, raiselist=[CsvNonexistenceException], timeout_min=UPLOAD_TIMEOUT_MINS) def upload(self): """Upload the folder to cns. """ temp_dir = tempfile.mkdtemp(suffix='perf_csv') try: self._download(temp_dir) files = os.listdir(temp_dir) # File in cns is stored under folder with format of: # //YYYY/mm/dd/hh/mm path_in_cns = os.path.join( self.cns_path, self.test_view.test_name, self.test_view.hostname, str(self.test_view.job_finished_time.year), str(self.test_view.job_finished_time.month).zfill(2), str(self.test_view.job_finished_time.day).zfill(2), str(self.test_view.job_finished_time.hour).zfill(2), str(self.test_view.job_finished_time.minute).zfill(2)) utils.run('fileutil mkdir -p %s' % path_in_cns) for f in files: utils.run('fileutil copytodir -f %s %s' % (os.path.join(temp_dir, f), path_in_cns)) finally: shutil.rmtree(temp_dir) class DBScanner(object): """Class contains the logic to query tko_test_attributes table for new perf_csv_folder attributes and create CsvFolder object for each new perf_csv_folder attribute. """ # Minimum test_attribute id for querying tko_test_attributes table. min_test_attribute_id = -1 @classmethod def get_perf_csv_folders(cls): """Query tko_test_attributes table for new entries of perf_csv_folder. @return: A list of CsvFolder objects for each new entry of perf_csv_folder attribute in tko_test_attributes table. """ attributes = tko_models.TestAttribute.objects.filter( attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id) folders = [] cutoff_time = (datetime.datetime.now() - datetime.timedelta(hours=CUTOFF_TIME_HOURS)) for attribute in attributes: test_views = tko_models.TestView.objects.filter( test_idx=attribute.test_id) if test_views[0].job_finished_time > cutoff_time: continue folders.append(CsvFolder(attribute.id, attribute.value, test_views[0])) return folders def setup_logging(log_dir): """Setup logging information. @param log_dir: Path to the directory storing logs of this script. """ config = logging_config.LoggingConfig() logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log') config.add_file_handler(file_path=logfile, level=logging.DEBUG) def save_min_test_attribute_id(test_attribute_id_file): """Save the minimum test attribute id to a cached file. @param test_attribute_id_file: Path to the file storing the value of min_test_attribute_id. """ with open(test_attribute_id_file, 'w') as f: return f.write(str(DBScanner.min_test_attribute_id)) def get_min_test_attribute_id(test_attribute_id_file): """Get the minimum test attribute id from a cached file. @param test_attribute_id_file: Path to the file storing the value of min_test_attribute_id. """ try: with open(test_attribute_id_file, 'r') as f: return int(f.read()) except IOError: # min_test_attribute_id has not been set, default to -1. return -1 def get_options(): """Get the command line options. @return: Command line options of the script. """ parser = argparse.ArgumentParser() parser.add_argument('--gs_path', type=str, dest='gs_path', help='GoogleStorage path that stores test results.') parser.add_argument('--cns_path', type=str, dest='cns_path', help='cns path to where csv files are uploaded to.') parser.add_argument('--log_dir', type=str, dest='log_dir', help='Directory used to store logs.') options = parser.parse_args() CsvFolder.gs_path = options.gs_path CsvFolder.cns_path = options.cns_path return options def main(): """Main process to repeat the workflow of searching/uploading csv files. """ options = get_options() setup_logging(options.log_dir) test_attribute_id_file = os.path.join(options.log_dir, 'perf_csv_uploader_test_attr_id') DBScanner.min_test_attribute_id = get_min_test_attribute_id( test_attribute_id_file) while True: folders = DBScanner.get_perf_csv_folders() if not folders: logging.info('No new folders found. Wait...') time.sleep(DEFAULT_INTERVAL_SEC) continue failed_folders = [] for folder in folders: try: logging.info('Uploading folder: %s', folder) folder.upload() except CsvNonexistenceException: # Ignore the failure if CSV files are not found in GS. pass except Exception as e: failed_folders.append(folder) logging.error('Failed to upload folder %s, error: %s', folder, e) if failed_folders: # Set the min_test_attribute_id to be the smallest one that failed # to upload. min_test_attribute_id = min([folder.test_attribute_id for folder in failed_folders]) else: min_test_attribute_id = max([folder.test_attribute_id for folder in folders]) + 1 if DBScanner.min_test_attribute_id != min_test_attribute_id: DBScanner.min_test_attribute_id = min_test_attribute_id save_min_test_attribute_id(test_attribute_id_file) if __name__ == '__main__': main()