You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
307 lines
9.5 KiB
307 lines
9.5 KiB
# Copyright 2017 The PDFium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
"""Compares pairs of page images and generates an HTML to look at differences.
|
|
"""
|
|
|
|
import functools
|
|
import glob
|
|
import multiprocessing
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import webbrowser
|
|
|
|
# pylint: disable=relative-import
|
|
from common import DirectoryFinder
|
|
|
|
|
|
def GenerateOneDiffParallel(image_comparison, image):
|
|
return image_comparison.GenerateOneDiff(image)
|
|
|
|
|
|
class ImageComparison(object):
|
|
"""Compares pairs of page images and generates an HTML to look at differences.
|
|
|
|
The images are all assumed to have the same name and be in two directories:
|
|
[output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example,
|
|
if output_path is "/tmp/images" and two_labels is ("before", "after"),
|
|
images in /tmp/images/before will be compared to /tmp/images/after. The HTML
|
|
produced will be in /tmp/images/compare.html and have relative links to these
|
|
images, so /tmp/images is self-contained and can be moved around or shared.
|
|
"""
|
|
|
|
def __init__(self, build_dir, output_path, two_labels, num_workers,
|
|
threshold_fraction):
|
|
"""Constructor.
|
|
|
|
Args:
|
|
build_dir: Path to the build directory.
|
|
output_path: Path with the pngs and where the html will be created.
|
|
two_labels: Tuple of two strings that name the subdirectories in
|
|
output_path containing the images.
|
|
num_workers: Number of worker threads to start.
|
|
threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which
|
|
an image is considered to have only small changes. They will not be
|
|
displayed on the HTML, only listed.
|
|
"""
|
|
self.build_dir = build_dir
|
|
self.output_path = output_path
|
|
self.two_labels = two_labels
|
|
self.num_workers = num_workers
|
|
self.threshold = threshold_fraction * 100
|
|
|
|
def Run(self, open_in_browser):
|
|
"""Runs the comparison and generates an HTML with the results.
|
|
|
|
Returns:
|
|
Exit status.
|
|
"""
|
|
|
|
# Running a test defines a number of attributes on the fly.
|
|
# pylint: disable=attribute-defined-outside-init
|
|
|
|
if len(self.two_labels) != 2:
|
|
print >> sys.stderr, 'two_labels must be a tuple of length 2'
|
|
return 1
|
|
|
|
finder = DirectoryFinder(self.build_dir)
|
|
self.img_diff_bin = finder.ExecutablePath('pdfium_diff')
|
|
|
|
html_path = os.path.join(self.output_path, 'compare.html')
|
|
|
|
self.diff_path = os.path.join(self.output_path, 'diff')
|
|
if not os.path.exists(self.diff_path):
|
|
os.makedirs(self.diff_path)
|
|
|
|
self.image_locations = ImageLocations(self.output_path, self.diff_path,
|
|
self.two_labels)
|
|
|
|
difference = self._GenerateDiffs()
|
|
|
|
small_changes = []
|
|
|
|
with open(html_path, 'w') as f:
|
|
f.write('<html><body>')
|
|
f.write('<table>')
|
|
for image in self.image_locations.Images():
|
|
diff = difference[image]
|
|
if diff is None:
|
|
print >> sys.stderr, 'Failed to compare image %s' % image
|
|
elif diff > self.threshold:
|
|
self._WriteImageRows(f, image, diff)
|
|
else:
|
|
small_changes.append((image, diff))
|
|
self._WriteSmallChanges(f, small_changes)
|
|
f.write('</table>')
|
|
f.write('</body></html>')
|
|
|
|
if open_in_browser:
|
|
webbrowser.open(html_path)
|
|
|
|
return 0
|
|
|
|
def _GenerateDiffs(self):
|
|
"""Runs a diff over all pairs of page images, producing diff images.
|
|
|
|
As a side effect, the diff images will be saved to [output_path]/diff
|
|
with the same image name.
|
|
|
|
Returns:
|
|
A dict mapping image names to percentage of pixels changes.
|
|
"""
|
|
difference = {}
|
|
pool = multiprocessing.Pool(self.num_workers)
|
|
worker_func = functools.partial(GenerateOneDiffParallel, self)
|
|
|
|
try:
|
|
# The timeout is a workaround for http://bugs.python.org/issue8296
|
|
# which prevents KeyboardInterrupt from working.
|
|
one_year_in_seconds = 3600 * 24 * 365
|
|
worker_results = (
|
|
pool.map_async(
|
|
worker_func,
|
|
self.image_locations.Images()).get(one_year_in_seconds))
|
|
for worker_result in worker_results:
|
|
image, result = worker_result
|
|
difference[image] = result
|
|
except KeyboardInterrupt:
|
|
pool.terminate()
|
|
sys.exit(1)
|
|
else:
|
|
pool.close()
|
|
|
|
pool.join()
|
|
|
|
return difference
|
|
|
|
def GenerateOneDiff(self, image):
|
|
"""Runs a diff over one pair of images, producing a diff image.
|
|
|
|
As a side effect, the diff image will be saved to [output_path]/diff
|
|
with the same image name.
|
|
|
|
Args:
|
|
image: Page image to compare.
|
|
|
|
Returns:
|
|
A tuple (image, diff), where image is the parameter and diff is the
|
|
percentage of pixels changed.
|
|
"""
|
|
try:
|
|
subprocess.check_output([
|
|
self.img_diff_bin,
|
|
self.image_locations.Left(image),
|
|
self.image_locations.Right(image)
|
|
])
|
|
except subprocess.CalledProcessError as e:
|
|
percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0])
|
|
else:
|
|
return image, 0
|
|
|
|
try:
|
|
subprocess.check_output([
|
|
self.img_diff_bin, '--diff',
|
|
self.image_locations.Left(image),
|
|
self.image_locations.Right(image),
|
|
self.image_locations.Diff(image)
|
|
])
|
|
except subprocess.CalledProcessError as e:
|
|
return image, percentage_change
|
|
else:
|
|
print >> sys.stderr, 'Warning: Should have failed the previous diff.'
|
|
return image, 0
|
|
|
|
def _GetRelativePath(self, absolute_path):
|
|
return os.path.relpath(absolute_path, start=self.output_path)
|
|
|
|
def _WriteImageRows(self, f, image, diff):
|
|
"""Write table rows for a page image comparing its two versions.
|
|
|
|
Args:
|
|
f: Open HTML file to write to.
|
|
image: Image file name.
|
|
diff: Percentage of different pixels.
|
|
"""
|
|
f.write('<tr><td colspan="2">')
|
|
f.write('%s (%.4f%% changed)' % (image, diff))
|
|
f.write('</td></tr>')
|
|
|
|
f.write('<tr>')
|
|
self._WritePageCompareTd(
|
|
f, self._GetRelativePath(self.image_locations.Left(image)),
|
|
self._GetRelativePath(self.image_locations.Right(image)))
|
|
self._WritePageTd(f, self._GetRelativePath(
|
|
self.image_locations.Diff(image)))
|
|
f.write('</tr>')
|
|
|
|
def _WritePageTd(self, f, image_path):
|
|
"""Write table column with a single image.
|
|
|
|
Args:
|
|
f: Open HTML file to write to.
|
|
image_path: Path to image file.
|
|
"""
|
|
f.write('<td>')
|
|
f.write('<img src="%s">' % image_path)
|
|
f.write('</td>')
|
|
|
|
def _WritePageCompareTd(self, f, normal_image_path, hover_image_path):
|
|
"""Write table column for an image comparing its two versions.
|
|
|
|
Args:
|
|
f: Open HTML file to write to.
|
|
normal_image_path: Path to image to be used in the "normal" state.
|
|
hover_image_path: Path to image to be used in the "hover" state.
|
|
"""
|
|
f.write('<td>')
|
|
f.write('<img src="%s" '
|
|
'onmouseover="this.src=\'%s\';" '
|
|
'onmouseout="this.src=\'%s\';">' %
|
|
(normal_image_path, hover_image_path, normal_image_path))
|
|
f.write('</td>')
|
|
|
|
def _WriteSmallChanges(self, f, small_changes):
|
|
"""Write table rows for all images considered to have only small changes.
|
|
|
|
Args:
|
|
f: Open HTML file to write to.
|
|
small_changes: List of (image, change) tuples, where image is the page
|
|
image and change is the percentage of pixels changed.
|
|
"""
|
|
for image, change in small_changes:
|
|
f.write('<tr><td colspan="2">')
|
|
if not change:
|
|
f.write('No change for: %s' % image)
|
|
else:
|
|
f.write('Small change of %.4f%% for: %s' % (change, image))
|
|
f.write('</td></tr>')
|
|
|
|
|
|
class ImageLocations(object):
|
|
"""Contains the locations of input and output image files.
|
|
"""
|
|
|
|
def __init__(self, output_path, diff_path, two_labels):
|
|
"""Constructor.
|
|
|
|
Args:
|
|
output_path: Path to directory with the pngs.
|
|
diff_path: Path to directory where the diffs will be generated.
|
|
two_labels: Tuple of two strings that name the subdirectories in
|
|
output_path containing the images.
|
|
"""
|
|
self.output_path = output_path
|
|
self.diff_path = diff_path
|
|
self.two_labels = two_labels
|
|
|
|
self.left = self._FindImages(self.two_labels[0])
|
|
self.right = self._FindImages(self.two_labels[1])
|
|
|
|
self.images = list(self.left.viewkeys() & self.right.viewkeys())
|
|
|
|
# Sort by pdf filename, then page number
|
|
def KeyFn(s):
|
|
pieces = s.rsplit('.', 2)
|
|
return (pieces[0], int(pieces[1]))
|
|
|
|
self.images.sort(key=KeyFn)
|
|
self.diff = {
|
|
image: os.path.join(self.diff_path, image) for image in self.images
|
|
}
|
|
|
|
def _FindImages(self, label):
|
|
"""Traverses a dir and builds a dict of all page images to compare in it.
|
|
|
|
Args:
|
|
label: name of subdirectory of output_path to traverse.
|
|
|
|
Returns:
|
|
Dict mapping page image names to the path of the image file.
|
|
"""
|
|
image_path_matcher = os.path.join(self.output_path, label, '*.*.png')
|
|
image_paths = glob.glob(image_path_matcher)
|
|
|
|
image_dict = {
|
|
os.path.split(image_path)[1]: image_path for image_path in image_paths
|
|
}
|
|
|
|
return image_dict
|
|
|
|
def Images(self):
|
|
"""Returns a list of all page images present in both directories."""
|
|
return self.images
|
|
|
|
def Left(self, test_case):
|
|
"""Returns the path for a page image in the first subdirectory."""
|
|
return self.left[test_case]
|
|
|
|
def Right(self, test_case):
|
|
"""Returns the path for a page image in the second subdirectory."""
|
|
return self.right[test_case]
|
|
|
|
def Diff(self, test_case):
|
|
"""Returns the path for a page diff image."""
|
|
return self.diff[test_case]
|