You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
107 lines
3.3 KiB
107 lines
3.3 KiB
#!/usr/bin/env python3
|
|
#
|
|
# Copyright 2018 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
#
|
|
|
|
"""A tool for running diffing tools and measuring patch sizes."""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
|
|
|
|
class Error(Exception):
|
|
"""Puffin general processing error."""
|
|
|
|
|
|
def ParseArguments(argv):
|
|
"""Parses and Validates command line arguments.
|
|
|
|
Args:
|
|
argv: command line arguments to parse.
|
|
|
|
Returns:
|
|
The arguments list.
|
|
"""
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--src-corpus', metavar='DIR',
|
|
help='The source corpus directory with compressed files.')
|
|
parser.add_argument('--tgt-corpus', metavar='DIR',
|
|
help='The target corpus directory with compressed files.')
|
|
parser.add_argument('--debug', action='store_true',
|
|
help='Turns on verbosity.')
|
|
|
|
# Parse command-line arguments.
|
|
args = parser.parse_args(argv)
|
|
|
|
for corpus in (args.src_corpus, args.tgt_corpus):
|
|
if not corpus or not os.path.isdir(corpus):
|
|
raise Error('Corpus directory {} is non-existent or inaccesible'
|
|
.format(corpus))
|
|
return args
|
|
|
|
|
|
def main(argv):
|
|
"""The main function."""
|
|
args = ParseArguments(argv[1:])
|
|
|
|
if args.debug:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
# Construct list of appropriate files.
|
|
src_files = list(filter(os.path.isfile,
|
|
[os.path.join(args.src_corpus, f)
|
|
for f in os.listdir(args.src_corpus)]))
|
|
tgt_files = list(filter(os.path.isfile,
|
|
[os.path.join(args.tgt_corpus, f)
|
|
for f in os.listdir(args.tgt_corpus)]))
|
|
|
|
# Check if all files in src_files have a target file in tgt_files.
|
|
files_mismatch = (set(map(os.path.basename, src_files)) -
|
|
set(map(os.path.basename, tgt_files)))
|
|
if files_mismatch:
|
|
raise Error('Target files {} do not exist in corpus: {}'
|
|
.format(files_mismatch, args.tgt_corpus))
|
|
|
|
for src in src_files:
|
|
with tempfile.NamedTemporaryFile() as puffdiff_patch, \
|
|
tempfile.NamedTemporaryFile() as bsdiff_patch:
|
|
|
|
tgt = os.path.join(args.tgt_corpus, os.path.basename(src))
|
|
|
|
operation = 'puffdiff'
|
|
cmd = ['puffin',
|
|
'--operation={}'.format(operation),
|
|
'--src_file={}'.format(src),
|
|
'--dst_file={}'.format(tgt),
|
|
'--patch_file={}'.format(puffdiff_patch.name)]
|
|
# Running the puffdiff operation
|
|
if subprocess.call(cmd) != 0:
|
|
raise Error('Puffin failed to do {} command: {}'
|
|
.format(operation, cmd))
|
|
|
|
operation = 'bsdiff'
|
|
cmd = ['bsdiff', '--type', 'bz2', src, tgt, bsdiff_patch.name]
|
|
# Running the bsdiff operation
|
|
if subprocess.call(cmd) != 0:
|
|
raise Error('Failed to do {} command: {}'
|
|
.format(operation, cmd))
|
|
|
|
logging.debug('%s(%d -> %d) : bsdiff(%d), puffdiff(%d)',
|
|
os.path.basename(src),
|
|
os.stat(src).st_size, os.stat(tgt).st_size,
|
|
os.stat(bsdiff_patch.name).st_size,
|
|
os.stat(puffdiff_patch.name).st_size)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main(sys.argv))
|