#!/usr/bin/env python3 # # Copyright 2018 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # """A tool for running diffing tools and measuring patch sizes.""" import argparse import logging import os import subprocess import sys import tempfile class Error(Exception): """Puffin general processing error.""" def ParseArguments(argv): """Parses and Validates command line arguments. Args: argv: command line arguments to parse. Returns: The arguments list. """ parser = argparse.ArgumentParser() parser.add_argument('--src-corpus', metavar='DIR', help='The source corpus directory with compressed files.') parser.add_argument('--tgt-corpus', metavar='DIR', help='The target corpus directory with compressed files.') parser.add_argument('--debug', action='store_true', help='Turns on verbosity.') # Parse command-line arguments. args = parser.parse_args(argv) for corpus in (args.src_corpus, args.tgt_corpus): if not corpus or not os.path.isdir(corpus): raise Error('Corpus directory {} is non-existent or inaccesible' .format(corpus)) return args def main(argv): """The main function.""" args = ParseArguments(argv[1:]) if args.debug: logging.getLogger().setLevel(logging.DEBUG) # Construct list of appropriate files. src_files = list(filter(os.path.isfile, [os.path.join(args.src_corpus, f) for f in os.listdir(args.src_corpus)])) tgt_files = list(filter(os.path.isfile, [os.path.join(args.tgt_corpus, f) for f in os.listdir(args.tgt_corpus)])) # Check if all files in src_files have a target file in tgt_files. files_mismatch = (set(map(os.path.basename, src_files)) - set(map(os.path.basename, tgt_files))) if files_mismatch: raise Error('Target files {} do not exist in corpus: {}' .format(files_mismatch, args.tgt_corpus)) for src in src_files: with tempfile.NamedTemporaryFile() as puffdiff_patch, \ tempfile.NamedTemporaryFile() as bsdiff_patch: tgt = os.path.join(args.tgt_corpus, os.path.basename(src)) operation = 'puffdiff' cmd = ['puffin', '--operation={}'.format(operation), '--src_file={}'.format(src), '--dst_file={}'.format(tgt), '--patch_file={}'.format(puffdiff_patch.name)] # Running the puffdiff operation if subprocess.call(cmd) != 0: raise Error('Puffin failed to do {} command: {}' .format(operation, cmd)) operation = 'bsdiff' cmd = ['bsdiff', '--type', 'bz2', src, tgt, bsdiff_patch.name] # Running the bsdiff operation if subprocess.call(cmd) != 0: raise Error('Failed to do {} command: {}' .format(operation, cmd)) logging.debug('%s(%d -> %d) : bsdiff(%d), puffdiff(%d)', os.path.basename(src), os.stat(src).st_size, os.stat(tgt).st_size, os.stat(bsdiff_patch.name).st_size, os.stat(puffdiff_patch.name).st_size) return 0 if __name__ == '__main__': sys.exit(main(sys.argv))