You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2090 lines
62 KiB

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- Mode: C++ -*-
//
// Copyright (C) 2013-2020 Red Hat, Inc.
/// @file
///
/// This file declares types and operations implementing the "O(ND)
/// Difference Algorithm" (aka diff2) from Eugene W. Myers, to compute
/// the difference between two sequences.
///
/// To understand what is going on here, one must read the paper at
/// http://www.xmailserver.org/diff2.pdf. Throughout this file, that
/// paper is referred to as "the paper".
///
/// The implementations goes as far as calculating the shortest edit
/// script (the set of insertions and deletions) for transforming a
/// sequence into another. The main entry point for that is the
/// compute_diff() function.
#ifndef __ABG_DIFF_UTILS_H__
#define __ABG_DIFF_UTILS_H__
#include <cassert>
#include <cstdlib>
#include <memory>
#include <ostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#include "abg-fwd.h"
namespace abigail
{
/// @brief Libabigail's core diffing algorithms
///
/// This is the namespace defining the core diffing algorithm used by
/// libabigail @ref comparison tools. This based on the diff
/// algorithm from Eugene Myers.
namespace diff_utils
{
using std::shared_ptr;
// Inject the names from std:: below into this namespace
using std::string;
using std::ostream;
using std::vector;
using std::abs;
using std::ostringstream;
/// A class representing a vertex in an edit graph, as explained in
/// the paper. A vertex is a basically a pair of coordinates
/// (abscissa and ordinate).
class point
{
int x_;
int y_;
bool empty_;
public:
point()
: x_(-1), y_(-1),empty_(true)
{}
point(int x, int y)
: x_(x), y_(y), empty_(false)
{}
point(const point& p)
: x_(p.x()), y_(p.y()), empty_(p.is_empty())
{}
int
x() const
{return x_;}
void
x(int x)
{
x_ = x;
empty_ = false;
}
int
y() const
{return y_;}
void
y(int y)
{
y_ = y;
empty_ = false;
}
void
set(int x, int y)
{
x_ = x;
y_ = y;
empty_ = false;
}
void
set(int x, int y, bool empty)
{
x_ = x;
y_ = y;
empty_ = empty;
}
void
add(int ax, int ay)
{set (x() + ax, y() + ay);}
bool
operator!=(const point& o) const
{return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}
bool
operator==(const point& o) const
{return !(operator!=(o));}
bool
operator<(const point& o) const
{return (x() < o.x() && y() < o.y());}
bool
operator>(const point& o) const
{return (x() > o.x() && y() > o.y());}
bool
operator<=(const point& o) const
{return (x() <= o.x() && y() <= o.y());}
bool
operator>=(const point& o) const
{return (x() >= o.x() && y() >= o.y());}
point
operator+(int val) const
{return point(x() + val, y() + val);}
point
operator-(int val) const
{return point(x() - val, y() - val);}
point&
operator+= (int val)
{
set(x_ + val, y_ + val);
return *this;
}
point&
operator-= (int val)
{return (*this) += (-val);}
point&
operator--()
{return (*this) -= 1;}
point&
operator++()
{return (*this) += 1;}
point
operator--(int)
{
point tmp(*this);
--(*this);
return tmp;
}
point
operator++(int)
{
point tmp(*this);
++(*this);
return tmp;
}
point&
operator=(int val)
{
set(val, val);
return *this;
}
point&
operator=(const point& p)
{
set(p.x(), p.y(), p.is_empty());
return *this;
}
bool
is_empty() const
{return empty_;}
operator bool () const
{return !is_empty();}
bool
operator!() const
{return is_empty();}
void
clear()
{
x_ = -1;
y_ = -1;
empty_ = true;
}
};// end point
/// The abstraction of the Snake concept, from the paper.
///
/// In a given path from the edit graph, a snake is a non-diagonal
/// edge followed by zero or more diagonal edges.
///
/// The starting poing of the non-diagonal edge is the beginning of
/// the snake. This is given by the snake::begin() method. This point
/// is not explicitely referenced in the paper, but we need it for
/// some grunt implementation details of the algorithm.
///
/// The end point of the non-diagonal edge is the intermediate point
/// of the snake; it's given by the snake::intermediate() method.
/// This point is what is referred to as "the begining of the snake"
/// in the paper.
///
/// The end point of the first diagonal edge is given by the
/// snake::diagonal_start() method.
///
/// The end point of the last diagonal edge is given by the
/// snake::end() method. Note that when the snake contains no
/// diagonal edge, snake::intermediate(), and snake::end() return the
/// same point; snake::diagonal_start() contains an empty point (i.e,
/// a point for which point::is_empty() returns true).
class snake
{
point begin_, intermediate_, diagonal_start_, end_;
bool forward_;
public:
/// Default constructor for snake.
snake()
: forward_(false)
{}
/// Constructor from the beginning, intermediate and end points.
///
/// @param b the beginning point of the snake. That is, the
/// starting point of the non-diagonal edge.
///
/// @param i the intermediate point of the snake. That is, the end
/// point of the non-diagonal edge.
///
/// @param e the end point of the snake. That is the end point of
/// the last diagonal edge.
snake(const point& b,
const point& i,
const point& e)
: begin_(b), intermediate_(i),
end_(e), forward_(false)
{}
/// Constructor from the beginning, intermediate and end points.
///
/// @param b the beginning point of the snake. That is, the
/// starting point of the non-diagonal edge.
///
/// @param i the intermediate point of the snake. That is, the end
/// point of the non-diagonal edge.
///
/// @param d the beginning of the diagonal edge. That is the end of
/// the first diagonal edge of the snake.
///
/// @param e the end point of the snake. That is the end point of
/// the last diagonal edge.
snake(const point& b,
const point& i,
const point& d,
const point& e)
: begin_(b), intermediate_(i),
diagonal_start_(d), end_(e),
forward_(false)
{}
/// Getter for the starting point of the non-diagonal edge of the
/// snake.
///
/// @return the starting point of the non-diagonal edge of the snake
const point&
begin() const
{return begin_;}
/// Getter for the starting point of the non-diagonal edge of the
/// snake, aka begin point.
///
///@param p the new begin point.
void
begin(const point& p)
{begin_ = p;}
/// Getter for the end point of the non-diagonal edge of the snake.
///
/// @return the end point of the non-diagonal edge of the snake
const point&
intermediate() const
{return intermediate_;}
/// Setter for the end point of the non-diagonal edge of the snake,
/// aka intermediate point.
///
/// @param p the new intermediate point.
void
intermediate(const point& p)
{intermediate_ = p;}
/// Getter for the end point of the first diagonal edge, aka
/// diagonal start point. Note that if the snake has no diagonal
/// edge, this point is empty.
///
/// @return the end point of the first diagonal edge.
const point&
diagonal_start() const
{return diagonal_start_;}
/// Setter for the end point of the first diagonal edge, aka
/// diagonal start point.
///
/// @param p the new diagonal start.d
void
diagonal_start(const point& p)
{diagonal_start_ = p;}
/// Getter for the end point of the last diagonal edge, aka snake
/// end point. Note that if the snake has no diagonal edge, this
/// point is equal to the intermediate point.
///
/// @return the end point of the last diagonal edge
const point&
end() const
{return end_;}
/// Setter for the end point of the last diagonal edge, aka snake
/// end point. Note that if the snake has no diagonal edge, this
/// point is equal to the intermediate point.
void
end(const point& p)
{end_ = p;}
/// Setter for the begin, intermediate and end points of the snake.
///
/// @param b the new snake begin point
///
/// @param i the new snake intermediate point
///
/// @param e the new snake end point
void
set(const point& b, const point&i, const point&e)
{
begin(b);
intermediate(i);
end(e);
}
/// Setter for the begin, intermediate, diagonal start and end points
/// of the snake.
///
/// @param b the new snake begin point
///
/// @param i the new snake intermediate point
///
/// @param d the new diagonal start point
///
/// @param e the new snake end point
void
set(const point& b, const point&i, const point& d, const point&e)
{
begin(b);
intermediate(i);
diagonal_start(d);
end(e);
}
/// @return true iff the snake is a forward snake. That is, if it
/// was built while walking the edit graph going forward (from the
/// top left corner to the right bottom corner.
bool
is_forward() const
{return forward_;}
/// Set to true if the snake is a forward snake; that is, if it was
/// built while walking the edit graph going forward (from the top
/// left corner to the right bottom corner. Set to false otherwise.
///
/// @param f whether the snake is a forward snake or not.
void
set_forward(bool f)
{forward_ = f;}
/// Add an offset to the abscissas of the points of the snake, and
/// add another offset to the ordinates of these same points.
///
/// @param x_offset the offset to add to the abscissas of all the
/// points of the snake.
///
/// @param y_offset the offset to add to the ordinates of all the
/// points of the snake.
void
add(int x_offset, int y_offset)
{
if (is_empty())
return;
begin_.add(x_offset, y_offset);
intermediate_.add(x_offset, y_offset);
if (diagonal_start_)
diagonal_start_.add(x_offset, y_offset);
end_.add(x_offset, y_offset);
}
/// @return true iff the snake has at least one diagonal edge.
bool
has_diagonal_edge() const
{return !diagonal_start().is_empty();}
/// @return true iff the non-diagonal edge is horizontal.
bool
has_horizontal_edge() const
{return (begin().y() == intermediate().y());}
/// @return true iff the non-diagonal edge is vertical.
bool
has_vertical_edge() const
{return (begin().x() == intermediate().x());}
/// @return true iff the snake is empty, that is, if all the points
/// it contains are empty.
bool is_empty() const
{return begin().is_empty() && intermediate().is_empty() && end().is_empty();}
};// end class snake
/// The array containing the furthest D-path end-points, for each value
/// of K. MAX_D is the maximum value of the D-Path. That is, M+N if
/// M is the size of the first input string, and N is the size of the
/// second.
class d_path_vec : public std::vector<int>
{
private:
unsigned a_size_;
unsigned b_size_;
/// Forbid vector size modifications
void
push_back(const vector<int>::value_type&);
/// Forbid default constructor.
d_path_vec();
bool
over_bounds(long long index) const
{return (index + offset()) >= (long long) size();}
void
check_index_against_bound(int index) const
{
if (over_bounds(index))
{
ostringstream o;
o << "index '" << index
<< "' out of range [-" << max_d() << ", " << max_d() << "]";
throw std::out_of_range(o.str());
}
}
public:
/// Constructor of the d_path_vec.
///
/// For forward vectors, the underlying vector allocates 2 *
/// [MAX_D+1].
/// space, so that one can address elements in the index range
/// [-MAX_D, MAX_D]. And MAX_D is the sum of the two sequence
/// sizes. delta is the difference.
///
/// For reverse vectors, note that we need to be able to address
/// [-MAX_D - delta, MAX_D + delta], with delta being the (signed)
/// difference between the size of the two sequences. We consider
/// delta being bounded by MAX_D itself; so we say we need to be
/// able to address [-2MAX_D, 2MAX_D].
///
/// @param size1 the size of the first sequence we are interested
/// in.
///
/// @param size2 the size of the second sequence we are interested
/// in.
d_path_vec(unsigned size1, unsigned size2)
: vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),
a_size_(size1), b_size_(size2)
{
}
std::vector<int>::const_reference
operator[](int index) const
{return at(index);}
std::vector<int>::reference
operator[](int index)
{return at(index);}
std::vector<int>::reference
at(long long index)
{
//check_index_against_bound(index);
long long i = index + offset();
return vector<int>::operator[](i);
}
std::vector<int>::const_reference
at(long long index) const
{
check_index_against_bound(index);
long long i = offset() + index;
return static_cast<const vector<int>* >(this)->at(i);
}
unsigned
a_size() const
{return a_size_;}
unsigned
b_size() const
{return b_size_;}
unsigned
max_d() const
{return a_size_ + b_size_;}
unsigned
offset() const
{return max_d() + abs((long long) a_size() - (long long) b_size());}
}; // end class d_path_vec
/// The abstration of an insertion of elements of a sequence B into a
/// sequence A. This is used to represent the edit script for
/// transforming a sequence A into a sequence B.
///
/// And insertion mainly encapsulates two components:
///
/// - An insertion point: this is the index (starting at 0) of the
/// element of the sequence A after which the insertion occurs.
///
/// - Inserted elements: this is a vector of indexes of elements of
/// sequence B (starting at 0) that got inserted into sequence A,
/// after the insertion point.
class insertion
{
int insertion_point_;
vector<unsigned> inserted_;
public:
insertion(int insertion_point,
const vector<unsigned>& inserted_indexes)
: insertion_point_(insertion_point),
inserted_(inserted_indexes)
{}
insertion(int insertion_point = 0)
: insertion_point_(insertion_point)
{}
int
insertion_point_index() const
{return insertion_point_;}
void
insertion_point_index(int i)
{insertion_point_ = i;}
const vector<unsigned>&
inserted_indexes() const
{return inserted_;}
vector<unsigned>&
inserted_indexes()
{return inserted_;}
};// end class insertion
/// The abstraction of the deletion of one element of a sequence A.
///
/// This encapsulates the index of the element A that got deleted.
class deletion
{
int index_;
public:
deletion(int i)
: index_(i)
{}
int
index() const
{return index_;}
void
index(int i)
{index_ = i;}
};// end class deletion
/// The abstraction of an edit script for transforming a sequence A
/// into a sequence B.
///
/// It encapsulates the insertions and deletions for transforming A
/// into B.
class edit_script
{
vector<insertion> insertions_;
vector<deletion> deletions_;
public:
edit_script()
{}
const vector<insertion>&
insertions() const
{return insertions_;}
vector<insertion>&
insertions()
{return insertions_;}
const vector<deletion>&
deletions() const
{return deletions_;}
vector<deletion>&
deletions()
{return deletions_;}
void
append(const edit_script& es)
{
insertions().insert(insertions().end(),
es.insertions().begin(),
es.insertions().end());
deletions().insert(deletions().end(),
es.deletions().begin(),
es.deletions().end());
}
void
prepend(const edit_script& es)
{
insertions().insert(insertions().begin(),
es.insertions().begin(),
es.insertions().end());
deletions().insert(deletions().begin(),
es.deletions().begin(),
es.deletions().end());
}
void
clear()
{
insertions().clear();
deletions().clear();
}
bool
is_empty() const
{return insertions().empty() && deletions().empty();}
operator bool() const
{return !is_empty();}
int
num_insertions() const
{
int l = 0;
for (vector<insertion>::const_iterator i = insertions().begin();
i != insertions().end();
++i)
l += i->inserted_indexes().size();
return l;
}
int
num_deletions() const
{return deletions().size();}
int
length() const
{return num_insertions() + num_deletions();}
};//end class edit_script
bool
point_is_valid_in_graph(point& p,
unsigned a_size,
unsigned b_size);
bool
ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
const point& reverse_d_path_end);
/// The default equality functor used by the core diffing algorithms.
struct default_eq_functor
{
/// This equality operator uses the default "==" to compare its
/// arguments.
///
/// @param a the first comparison argument.
///
/// @param b the second comparison argument.
///
/// @return true if the two arguments are equal, false otherwise.
template<typename T>
bool
operator()(const T a, const T b) const
{return a == b;}
};
/// An equality functor to deeply compare pointers.
struct deep_ptr_eq_functor
{
/// This equality operator compares pointers by comparing the
/// pointed-to objects.
///
/// @param first the first comparison argument.
///
/// @param second the second comparison argument.
///
/// @return true if the objects pointed to by the pointers are
/// equal, false otherwise.
template<typename T>
bool
operator()(const T* first,
const T* second) const
{
if (!!first != !!second)
return false;
if (!first)
return true;
return *first == *second;
}
/// This equality operator compares pointers by comparing the
/// pointed-to objects.
///
/// @param first the first comparison argument.
///
/// @param second the second comparison argument.
///
/// @return true if the objects pointed to by the pointers are
/// equal, false otherwise.
template<typename T>
bool
operator()(const shared_ptr<T> first,
const shared_ptr<T> second) const
{return operator()(first.get(), second.get());}
/// This equality operator compares pointers by comparing the
/// pointed-to objects.
///
/// @param first the first comparison argument.
///
/// @param second the second comparison argument.
///
/// @return true if the objects pointed to by the pointers are
/// equal, false otherwise.
template<typename T>
bool
operator()(const weak_ptr<T> first,
const weak_ptr<T> second) const
{return operator()(shared_ptr<T>(first), shared_ptr<T>(second));}
}; // end struct deep_ptr_eq_functor
/// Find the end of the furthest reaching d-path on diagonal k, for
/// two sequences. In the paper This is referred to as "the basic
/// algorithm".
///
/// Unlike in the paper, the coordinates of the edit graph start at
/// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
/// than (M,N).
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param k the number of the diagonal on which we want to find the
/// end of the furthest reaching D-path.
///
/// @param d the D in D-Path. That's the number of insertions/deletions
/// (the number of changes, in other words) in the changeset. That is
/// also the number of non-diagonals in the D-Path.
///
/// @param a_begin an iterator to the beginning of the first sequence
///
/// @param a_end an iterator that points right after the last element
/// of the second sequence to consider.
///
/// @param b_begin an iterator to the beginning of the second sequence.
///
/// @param b_end an iterator that points right after the last element
/// of the second sequence to consider.
///
/// @param v the vector of furthest end points of d_paths, at (d-1).
/// It contains the abscissas of the furthest end points for different
/// values of k, at (d-1). That is, for k in [-D + 1, -D + 3, -D + 5,
/// ..., D - 1], v[k] is the abscissa of the end of the furthest
/// reaching (D-1)-path on diagonal k.
///
/// @param snak the last snake of the furthest path found. The end
/// point of the snake is the end point of the furthest path.
///
/// @return true if the end of the furthest reaching path that was
/// found was inside the boundaries of the edit graph, false
/// otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
bool
end_of_fr_d_path_in_k(int k, int d,
RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_start,
RandomAccessOutputIterator b_end,
d_path_vec& v, snake& snak)
{
int x = -1, y = -1;
point begin, intermediate, diag_start, end;
snake s;
EqualityFunctor eq;
// Let's pick the end point of the furthest reaching
// (D-1)-path. It's either v[k-1] or v[k+1]; the word
// "furthest" means we choose the one which abscissa is the
// greatest (that is, furthest from abscissa zero).
if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))
// So, the abscissa of the end point of the furthest
// reaching (D-1)-path is v[k+1]. That is a diagonal that
// is above the current (k) diagonal, and on the right.
// To move to the current k diagonal, one has to move
// "down" from the diagonal k+1. So the abscissa won't
// change. Only the ordinate will. It will be given by y
// = x - k (a bit below); as k has changed from k - 1 (it
// has increased), y is going to be the new y that is
// 'down' from the previous y in k - 1.
{
x = v[k+1];
begin.set(x, x - (k + 1));
}
else
{
// So the abscissa of the end point of the furthest
// (D-1)-path is v[k-1]. That is on the left of the
// current k diagonal. To move to the current k diagonal,
// one has to move "right" from diagonal k - 1. That is,
// the y stays constant and x is incremented.
x = v[k-1];
begin.set(x, x - (k - 1));
++x;
}
// Now get the value of y from the equation k = x -y.
// This is the point where we first touch K, when we move
// from the end of the furthest reaching (D-1)-path.
y = x - k;
intermediate.x(x);
intermediate.y(y);
int last_x_index = a_end - a_begin - 1;
int last_y_index = b_end - b_start - 1;
// Now, follow the snake (aka, zero or more consecutive
// diagonals). Note that we stay on the k diagonal when we
// do this.
while ((x < last_x_index) && (y < last_y_index))
if (eq(a_begin[x + 1], b_start[y + 1]))
{
x = x + 1;
y = y + 1;
if (!diag_start)
diag_start.set(x, y);
}
else
break;
end.x(x);
end.y(y);
// Note the point that we store in v here might be outside the
// bounds of the edit graph. But we store it at this step (for a
// given D) anyway, because out of bound or not, we need this value
// at this step to be able to compute the value of the point on the
// "next" diagonal for the next D.
v[k] = x;
if (x >= (int) v.a_size()
|| y >= (int) v.b_size()
|| x < -1 || y < -1)
return false;
s.set(begin, intermediate, diag_start, end);
s.set_forward(true);
snak = s;
return true;
}
/// Find the end of the furthest reaching reverse d-path on diagonal k
/// + delta. Delta is abs(M - N), with M being the size of a and N
/// being the size of b. This is the "basic algorithm", run backward.
/// That is, starting from the point (M,N) of the edit graph.
///
/// Unlike in the paper, the coordinates of the edit graph start at
/// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
/// than (M,N).
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param k the number of the diagonal on which we want to find the
/// end of the furthest reaching reverse D-path. Actually, we want to
/// find the end of the furthest reaching reverse D-path on diagonal (k
/// - delta).
///
/// @param d the D in D-path. That's the number of insertions/deletions
/// (the number of changes, in other words) in the changeset. That is
/// also the number of non-diagonals in the D-Path.
///
/// @param a_begin an iterator to the beginning of the first sequence
///
/// @param a_end an iterator that points right after the last element
/// of the second sequence to consider.
///
/// @param b_begin an iterator to the beginning of the second sequence.
///
/// @param b_end an iterator that points right after the last element
/// of the second sequence to consider.
///
/// @param v the vector of furthest end points of d_paths, at (d-1).
/// It contains the abscissae of the furthest end points for different
/// values of k - delta, at (d-1). That is, for k in [-D + 1, -D + 3,
/// -D + 5, ..., D - 1], v[k - delta] is the abscissa of the end of the
/// furthest reaching (D-1)-path on diagonal k - delta.
///
/// @param snak the last snake of the furthest path found. The end
/// point of the snake is the end point of the furthest path.
///
/// @return true iff the end of the furthest reaching path that was
/// found was inside the boundaries of the edit graph, false
/// otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
bool
end_of_frr_d_path_in_k_plus_delta (int k, int d,
RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
d_path_vec& v, snake& snak)
{
int a_size = a_end - a_begin;
int b_size = b_end - b_begin;
int delta = a_size - b_size;
int k_plus_delta = k + delta;
int x = -1, y = -1;
point begin, intermediate, diag_start, end;
snake s;
EqualityFunctor eq;
// Let's pick the end point of the furthest reaching (D-1)-path and
// move from there to reach the current k_plus_delta-line. That end
// point of the furthest reaching (D-1)-path is either on
// v[k_plus_delta-1] or on v[k_plus_delta+1]; the word "furthest"
// means we choose the one which abscissa is the lowest (that is,
// furthest from abscissa M).
if (k_plus_delta == -d + delta
|| ((k_plus_delta != d + delta)
&& (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))
{
// We move left, that means ordinate won't change ...
x = v[k_plus_delta + 1];
y = x - (k_plus_delta + 1);
begin.set(x, y);
// ... and abscissa decreases.
x = x - 1;
}
else
{
// So the furthest end point is on the k_plus_delta - 1
// diagonal. That is a diagonal that is 'below' the
// k_plus_delta current diagonal. So to join the current
// diagonal from the k_plus_delta - 1 one, we need to move up.
// So moving up means abscissa won't change ...
x = v[k_plus_delta - 1];
begin.set(x, x - (k_plus_delta - 1));
// ... and that ordinate decreases.
y = x - (k_plus_delta - 1) - 1;
}
intermediate.set(x, y);
// Now, follow the snake. Note that we stay on the k_plus_delta
// diagonal when we do this.
while (x >= 0 && y >= 0)
if (eq(a_begin[x], b_begin[y]))
{
if (!diag_start)
diag_start.set(x, y);
x = x - 1;
y = y - 1;
}
else
break;
end.set(x, y);
// Note the point that we store in v here might be outside the
// bounds of the edit graph. But we store it at this step (for a
// given D) anyway, because out of bound or not, we need this value
// at this step to be able to compute the value of the point on the
// "next" diagonal for the next D.
v[k_plus_delta] = x;
if (x == -1 && y == -1)
;
else if (x < -1 || y < -1)
return false;
s.set(begin, intermediate, diag_start, end);
s.set_forward(false);
snak = s;
return true;
}
/// Tests if a given point is a match point in an edit graph.
///
/// @param a_begin the begin iterator of the first input sequence of
/// the edit graph.
///
/// @param a_end the end iterator of the first input sequence of the
/// edit graph. This points to one element passed the end of the
/// sequence.
///
/// @param b_begin the begin iterator of the second input sequence of
/// the edit graph.
///
/// @param b_end the end iterator of the second input sequence of the
/// edit graph. This points the one element passed the end of the
/// sequence.
///
/// @param point the point to test for being a match point.
///
/// @return true iff \a point is a match point.
template<typename RandomAccessOutputIterator>
bool
is_match_point(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
const point& point)
{
int a_size = a_end - a_begin, b_size = b_end - b_begin;
if (point.x() < 0
|| point.x () >= a_size
|| point.y() < 0
|| point.y() >= b_size)
return false;
return (a_begin[point.x()] == b_begin[point.y()]);
}
/// Returns the middle snake of two sequences A and B, as well as the
/// length of their shortest editing script.
///
/// This uses the "linear space refinement" algorithm presented in
/// section 4b in the paper. As the paper says, "The idea for doing
/// so is to simultaneously run the basic algorithm in both the
/// forward and reverse directions until furthest reaching forward and
/// reverse paths starting at opposing corners 'overlap'."
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_begin an iterator pointing to the begining of sequence A.
///
/// @param a_end an iterator pointing to the end of sequence A. Note
/// that this points right /after/ the end of vector A.
///
/// @param b_begin an iterator pointing to the begining of sequence B.
///
/// @param b_end an iterator pointing to the end of sequence B. Note
/// that this points right /after/ the end of vector B
///
/// @param snak out parameter. This is the snake current when the two
/// paths overlapped. This is set iff the function returns true;
/// otherwise, this is not touched.
///
/// @return true is the snake was found, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
bool
compute_middle_snake(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
snake& snak, int& ses_len)
{
int a_size = a_end - a_begin;
int N = a_size;
int b_size = b_end - b_begin;
int M = b_size;
int delta = N - M;
d_path_vec forward_d_paths(a_size, b_size);
d_path_vec reverse_d_paths(a_size, b_size);
// These points below are the top leftmost point and bottom
// right-most points of the edit graph.
point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);
// We want the initial step (D = 0, k = 0 in the paper) to find a
// furthest reaching point on diagonal k == 0; For that, we need the
// value of x for k == 1; So let's set that value to -1; that is for
// k == 1 (diagonal 1), the point in the edit graph is (-1,-2).
// That way, to get the furthest reaching point on diagonal 0 (k ==
// 0), we go down from (-1,-2) on diagonal 1 and we hit diagonal 0
// on (-1,-1); that is the starting value that the algorithm expects
// for k == 0.
forward_d_paths[1] = -1;
// Similarly for the reverse paths, for diagonal delta + 1 (note
// that diagonals are centered on delta, unlike for forward paths
// where they are centered on zero), we set the initial point to
// (a_size, b_size - 1). That way, at step D == 0 and k == delta,
// to reach diagonal delta from the point (a_size, b_size - 1) on
// diagonal delta + 1, we just have to move left, and we hit
// diagonal delta on (a_size - 1, b_size -1); that is the starting
// point value the algorithm expects for k == 0 in the reverse case.
reverse_d_paths[delta + 1] = a_size;
int d_max = (M + N) / 2 + 1;
for (int d = 0; d <= d_max; ++d)
{
// First build forward paths.
for (int k = -d; k <= d; k += 2)
{
snake s;
bool found =
end_of_fr_d_path_in_k<RandomAccessOutputIterator,
EqualityFunctor>(k, d,
a_begin, a_end,
b_begin, b_end,
forward_d_paths, s);
if (!found)
continue;
// As the paper says in 4b while explaining the middle snake
// algorithm:
//
// "Thus when delta is odd, check for overlap only while
// extending forward paths ..."
if ((delta % 2)
&& (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))
{
point reverse_end;
reverse_end.x(reverse_d_paths[k]);
reverse_end.y(reverse_end.x() - k);
if (ends_of_furthest_d_paths_overlap(s.end(), reverse_end))
{
ses_len = 2 * d - 1;
snak = s;
return true;
}
}
}
// Now build reverse paths.
for (int k = -d; k <= d; k += 2)
{
snake s;
bool found =
end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
EqualityFunctor>(k, d,
a_begin, a_end,
b_begin, b_end,
reverse_d_paths,
s);
if (!found)
continue;
// And the paper continues by saying:
//
// "... and when delta is even, check for overlap only while
// extending reverse paths."
int k_plus_delta = k + delta;
if (!(delta % 2)
&& (k_plus_delta >= -d) && (k_plus_delta <= d))
{
point forward_end;
forward_end.x(forward_d_paths[k_plus_delta]);
forward_end.y(forward_end.x() - k_plus_delta);
if (ends_of_furthest_d_paths_overlap(forward_end, s.end()))
{
ses_len = 2 * d;
snak = s;
return true;
}
}
}
}
return false;
}
bool
compute_middle_snake(const char* str1, const char* str2,
snake& s, int& ses_len);
/// This prints the middle snake of two strings.
///
/// @param a_begin the beginning of the first string.
///
/// @param b_begin the beginning of the second string.
///
/// @param s the snake to print.
///
/// @param out the output stream to print the snake to.
template<typename RandomAccessOutputIterator>
void
print_snake(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator b_begin,
const snake &s, ostream& out)
{
if (s.is_empty())
return;
out << "snake start: ";
out << "(" << s.begin().x() << ", " << s.end().y() << ")\n";
out << "snake intermediate: ";
out << "(" << s.intermediate().x() << ", " << s.intermediate().y() << ")\n";
out << "diagonal point(s): ";
if (s.has_diagonal_edge())
for (int x = s.intermediate().x(), y = s.intermediate().y();
x <= s.end().x() && y <= s.end().y();
++x, ++y)
{
ABG_ASSERT(a_begin[x] == b_begin[y]);
out << "(" << x << "," << y << ") ";
}
out << "\n";
out << "snake end: ";
out << "(" << s.end().x() << ", " << s.end().y() << ")\n";
}
/// Compute the length of the shortest edit script for two sequences a
/// and b. This is done using the "Greedy LCS/SES" of figure 2 in the
/// paper. It can walk the edit graph either foward (when reverse is
/// false) or backward starting from the end (when reverse is true).
///
/// Here, note that the real content of a and b should start at index
/// 1, for this implementatikon algorithm to match the paper's
/// algorithm in a straightforward manner. So pleast make sure that
/// at index 0, we just get some non-used value.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a the first sequence we care about.
///
/// @param b the second sequence we care about.
///
/// @param v the vector that contains the end points of the furthest
/// reaching d-path and (d-1)-path.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
int
ses_len(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
d_path_vec& v, bool reverse)
{
unsigned a_size = a_end - a_begin;
unsigned b_size = b_end - b_begin;
snake snak;
ABG_ASSERT(v.max_d() == a_size + b_size);
int delta = a_size - b_size;
if (reverse)
// Set a fictitious (M, N-1) into v[1], to find the furthest
// reaching reverse 0-path (i.e, when we are at d == 0 and k == 0).
v[delta + 1] = a_size - 1;
else
// Set a fictitious (-1,-2) point into v[1], to find the furthest
// reaching forward 0-path (i.e, when we are at d == 0 and k == 0).
v[1] = -1;
for (unsigned d = 0; d <= v.max_d(); ++d)
{
for (int k = -d; k <= (int) d; k += 2)
{
point end;
if (reverse)
{
bool found =
end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
EqualityFunctor>(k, d,
a_begin, a_end,
b_begin, b_end,
v, snak);
// If we reached the upper left corner of the edit graph then
// we are done.
if (found && snak.end().x() == -1 && snak.end().y() == -1)
return d;
}
else
{
end_of_fr_d_path_in_k<RandomAccessOutputIterator,
EqualityFunctor>(k, d,
a_begin, a_end,
b_begin, b_end,
v, snak);
// If we reached the lower right corner of the edit
// graph then we are done.
if ((snak.end().x() == (int) a_size - 1)
&& (snak.end().y() == (int) b_size - 1))
return d;
}
}
}
return 0;
}
/// Compute the length of the shortest edit script for two sequences a
/// and b. This is done using the "Greedy LCS/SES" of figure 2 in the
/// paper. It can walk the edit graph either foward (when reverse is
/// false) or backward starting from the end (when reverse is true).
///
/// Here, note that the real content of a and b should start at index
/// 1, for this implementatikon algorithm to match the paper's
/// algorithm in a straightforward manner. So pleast make sure that
/// at index 0, we just get some non-used value.
///
/// Note that the equality operator used to compare the elements
/// passed in argument to this function is the default "==" operator.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @param a the first sequence we care about.
///
/// @param b the second sequence we care about.
///
/// @param v the vector that contains the end points of the furthest
/// reaching d-path and (d-1)-path.
template<typename RandomAccessOutputIterator>
int
ses_len(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
d_path_vec& v, bool reverse)
{
return ses_len<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
b_begin, b_end,
v, reverse);
}
int
ses_len(const char* str1,
const char* str2,
bool reverse = false);
bool
snake_end_points(const snake& s, point&, point&);
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// A sequence is determined by a base, a beginning offset and an end
/// offset. The base always points to the container that contains the
/// sequence to consider. The beginning offset is an iterator that
/// points the beginning of the sub-region of the sequence that we
/// actually want to consider. The end offset is an iterator that
/// points to the end of the sub-region of the sequence that we
/// actually want to consider.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_base the iterator to the base of the first sequence.
///
/// @param a_start an iterator to the beginning of the sub-region
/// of the first sequence to actually consider.
///
/// @param a_end an iterator to the end of the sub-region of the first
/// sequence to consider.
///
///@param b_base an iterator to the base of the second sequence to
///consider.
///
/// @param b_start an iterator to the beginning of the sub-region
/// of the second sequence to actually consider.
///
/// @param b_end an iterator to the end of the sub-region of the
/// second sequence to actually consider.
///
/// @param lcs the resulting lcs. This is set iff the function
/// returns true.
///
/// @param ses the resulting shortest editing script.
///
/// @param ses_len the length of the ses above. Normally this can be
/// retrieved from ses.length(), but this parameter is here for sanity
/// check purposes. The function computes the length of the ses in
/// two redundant ways and ensures that both methods lead to the same
/// result.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
void
compute_diff(RandomAccessOutputIterator a_base,
RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_base,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
vector<point>& lcs,
edit_script& ses,
int& ses_len)
{
int a_size = a_end - a_begin;
int b_size = b_end - b_begin;
unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;
if (a_size == 0 || b_size == 0)
{
if (a_size > 0 && b_size == 0)
// All elements of the first sequences have been deleted. So add
// the relevant deletions to the edit script.
for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
ses.deletions().push_back(deletion(i - a_base));
if (b_size > 0 && a_size == 0)
{
// All elements present in the second sequence are part of
// an insertion into the first sequence at a_end. So add
// that insertion to the edit script.
int a_full_size = a_end - a_base;
int insertion_index = a_full_size ? a_full_size - 1 : -1;
insertion ins(insertion_index);
for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
ins.inserted_indexes().push_back(i - b_base);
ses.insertions().push_back(ins);
}
ses_len = a_size + b_size;
return;
}
int d = 0;
snake snak;
vector<point> trace; // the trace of the edit graph. Read the paper
// to understand what a trace is.
bool has_snake =
compute_middle_snake<RandomAccessOutputIterator,
EqualityFunctor>(a_begin, a_end,
b_begin, b_end,
snak, d);
if (has_snake)
{
// So middle_{begin,end} are expressed wrt a_begin and b_begin.
// Let's express them wrt a_base and b_base.
snak.add(a_offset, b_offset);
ses_len = d;
}
if (has_snake)
{
if ( snak.has_diagonal_edge())
for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
x <= snak.end().x() && y <= snak.end().y();
++x, ++y)
{
point p(x, y);
trace.push_back(p);
}
}
else
{
// So there is no middle snake. That means there is no lcs, so
// the two sequences are different.
// In other words, all the elements of the first sequence have
// been deleted ...
for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
ses.deletions().push_back(deletion(i - a_base));
// ... and all the elements of the second sequence are insertions
// that happen at the beginning of the first sequence.
insertion ins(a_begin - a_base);
for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
ins.inserted_indexes().push_back(i - b_base);
ses.insertions().push_back(ins);
ses_len = a_size + b_size;
ABG_ASSERT(ses_len == ses.length());
return;
}
if (d > 1)
{
int tmp_ses_len0 = 0;
edit_script tmp_ses0;
point px, pu;
snake_end_points(snak, px, pu);
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_base, a_begin, a_base + (px.x() + 1),
b_base, b_begin, b_base + (px.y() + 1),
lcs, tmp_ses0, tmp_ses_len0);
lcs.insert(lcs.end(), trace.begin(), trace.end());
int tmp_ses_len1 = 0;
edit_script tmp_ses1;
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_base, a_base + (pu.x() + 1), a_end,
b_base, b_base + (pu.y() + 1), b_end,
lcs, tmp_ses1, tmp_ses_len1);
ABG_ASSERT(tmp_ses0.length() + tmp_ses1.length() == d);
ABG_ASSERT(tmp_ses_len0 + tmp_ses_len1 == d);
ses.append(tmp_ses0);
ses.append(tmp_ses1);
}
else if (d == 1)
{
if (snak.has_diagonal_edge())
{
for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
x <= snak.end().x() && y <= snak.end().y();
++x, ++y)
{
point p(x, y);
trace.push_back(p);
}
}
if (snak.has_vertical_edge())
{
point p = snak.intermediate();
insertion ins(p.x());
ins.inserted_indexes().push_back(p.y());
ses.insertions().push_back(ins);
}
else if (snak.has_horizontal_edge())
{
if (snak.is_forward())
{
deletion del(snak.intermediate().x());
ses.deletions().push_back(del);
}
else
{
deletion del(snak.begin().x());
ses.deletions().push_back(del);
}
}
}
else if (d == 0)
{
// Obviously on the middle snake is part of the solution, as
// there is no edit script; iow, the two sequences are
// identical.
lcs.insert(lcs.end(), trace.begin(), trace.end());
ses_len = 0;
}
ABG_ASSERT(ses_len == ses.length());
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_start an iterator to the beginning of the first sequence
/// to consider.
///
/// @param a_end an iterator to the end of the first sequence to
/// consider.
///
/// @param b_start an iterator to the beginning of the second sequence
/// to consider.
///
/// @param b_end an iterator to the end of the second sequence to
/// consider.
///
/// @param lcs the resulting lcs. This is set iff the function
/// returns true.
///
/// @param ses the resulting shortest editing script.
///
/// @param ses_len the length of the ses above. Normally this can be
/// retrieved from ses.length(), but this parameter is here for sanity
/// check purposes. The function computes the length of the ses in
/// two redundant ways and ensures that both methods lead to the same
/// result.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
void
compute_diff(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
vector<point>& lcs,
edit_script& ses,
int& ses_len)
{
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_begin, a_begin, a_end,
b_begin, b_begin, b_end,
lcs, ses, ses_len);
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// A sequence is determined by a base, a beginning offset and an end
/// offset. The base always points to the container that contains the
/// sequence to consider. The beginning offset is an iterator that
/// points the beginning of the sub-region of the sequence that we
/// actually want to consider. The end offset is an iterator that
/// points to the end of the sub-region of the sequence that we
/// actually want to consider.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_base the iterator to the base of the first sequence.
///
/// @param a_start an iterator to the beginning of the sub-region
/// of the first sequence to actually consider.
///
/// @param a_end an iterator to the end of the sub-region of the first
/// sequence to consider.
///
///@param b_base an iterator to the base of the second sequence to
///consider.
///
/// @param b_start an iterator to the beginning of the sub-region
/// of the second sequence to actually consider.
///
/// @param b_end an iterator to the end of the sub-region of the
/// second sequence to actually consider.
///
/// @param lcs the resulting lcs. This is set iff the function
/// returns true.
///
/// @param ses the resulting shortest editing script.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
void
compute_diff(RandomAccessOutputIterator a_base,
RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_base,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
vector<point>& lcs,
edit_script& ses)
{
int ses_len = 0;
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_base, a_begin, a_end,
b_base, b_begin, b_end,
lcs, ses, ses_len);
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_start an iterator to the beginning of the first sequence
/// to consider.
///
/// @param a_end an iterator to the end of the first sequence to
/// consider.
///
/// @param b_start an iterator to the beginning of the sequence to
/// actually consider.
///
/// @param b_end an iterator to the end of second sequence to
/// consider.
///
/// @param lcs the resulting lcs. This is set iff the function
/// returns true.
///
/// @param ses the resulting shortest editing script.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
void
compute_diff(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
vector<point>& lcs,
edit_script& ses)
{
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_begin, a_begin, a_end,
b_begin, b_begin, b_end,
lcs, ses);
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @param a_start an iterator to the beginning of the first sequence
/// to consider.
///
/// @param a_end an iterator to the end of the first sequence to
/// consider.
///
/// @param b_start an iterator to the beginning of the sequence to
/// actually consider.
///
/// @param b_end an iterator to the end of second sequence to
/// consider.
///
/// @param lcs the resulting lcs. This is set iff the function
/// returns true.
///
/// @param ses the resulting shortest editing script.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator>
void
compute_diff(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
vector<point>& lcs,
edit_script& ses)
{
compute_diff<RandomAccessOutputIterator,
default_eq_functor>(a_begin, a_end, b_begin, b_end, lcs, ses);
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// A sequence is determined by a base, a beginning offset and an end
/// offset. The base always points to the container that contains the
/// sequence to consider. The beginning offset is an iterator that
/// points the beginning of the sub-region of the sequence that we
/// actually want to consider. The end offset is an iterator that
/// points to the end of the sub-region of the sequence that we
/// actually want to consider.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_base the iterator to the base of the first sequence.
///
/// @param a_start an iterator to the beginning of the sub-region
/// of the first sequence to actually consider.
///
/// @param a_end an iterator to the end of the sub-region of the first
/// sequence to consider.
///
/// @param b_base an iterator to the base of the second sequence to
/// consider.
///
/// @param b_start an iterator to the beginning of the sub-region
/// of the second sequence to actually consider.
///
/// @param b_end an iterator to the end of the sub-region of the
/// second sequence to actually consider.
///
/// @param ses the resulting shortest editing script.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
void
compute_diff(RandomAccessOutputIterator a_base,
RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_base,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
edit_script& ses)
{
vector<point> lcs;
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_base, a_begin, a_end,
b_base, b_begin, b_end,
lcs, ses);
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @tparm EqualityFunctor this must be a class that declares a public
/// call operator member returning a boolean and taking two arguments
/// that must be of the same type as the one pointed to by the @ref
/// RandomAccessOutputIterator template parameter. This functor is
/// used to compare the elements referred to by the iterators pased in
/// argument to this function.
///
/// @param a_start an iterator to the beginning of the first sequence
/// to consider.
///
/// @param a_end an iterator to the end of the first sequence to
/// consider.
///
/// @param b_start an iterator to the beginning of the second sequence
/// to consider.
///
/// @param b_end an iterator to the end of the second sequence to
/// consider.
///
/// @param ses the resulting shortest editing script.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator,
typename EqualityFunctor>
void
compute_diff(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
edit_script& ses)
{
compute_diff<RandomAccessOutputIterator,
EqualityFunctor>(a_begin, a_begin, a_end,
b_begin, b_begin, b_end,
ses);
}
/// Compute the longest common subsequence of two (sub-regions of)
/// sequences as well as the shortest edit script from transforming
/// the first (sub-region of) sequence into the second (sub-region of)
/// sequence.
///
/// This uses the LCS algorithm of the paper at section 4b.
///
/// @tparm RandomAccessOutputIterator the type of iterators passed to
/// this function. It must be a random access output iterator kind.
///
/// @param a_start an iterator to the beginning of the first sequence
/// to consider.
///
/// @param a_end an iterator to the end of the first sequence to
/// consider.
///
/// @param b_start an iterator to the beginning of the second sequence
/// to consider.
///
/// @param b_end an iterator to the end of the second sequence to
/// consider.
///
/// @param ses the resulting shortest editing script.
///
/// @return true upon successful completion, false otherwise.
template<typename RandomAccessOutputIterator>
void
compute_diff(RandomAccessOutputIterator a_begin,
RandomAccessOutputIterator a_end,
RandomAccessOutputIterator b_begin,
RandomAccessOutputIterator b_end,
edit_script& ses)
{
compute_diff<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
b_begin, b_end,
ses);
}
void
compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs);
void
compute_ses(const char* str1, const char* str2, edit_script& ses);
/// Display an edit script on standard output.
///
/// @param es the edit script to display
///
/// @param str1_base the first string the edit script is about.
///
/// @pram str2_base the second string the edit script is about.
template<typename RandomAccessOutputIterator>
void
display_edit_script(const edit_script& es,
const RandomAccessOutputIterator str1_base,
const RandomAccessOutputIterator str2_base,
ostream& out)
{
if (es.num_deletions() == 0)
out << "no deletion:\n";
else if (es.num_deletions() == 1)
{
out << "1 deletion:\n"
<< "\t happened at index: ";
}
else
{
out << es.num_deletions() << " deletions:\n"
<< "\t happened at indexes: ";
}
for (vector<deletion>::const_iterator i = es.deletions().begin();
i != es.deletions().end();
++i)
{
if (i != es.deletions().begin())
out << ", ";
out << i->index() << " (" << str1_base[i->index()] << ")";
}
out << "\n\n";
if (es.num_insertions() == 0)
out << "no insertion\n";
else if (es.num_insertions() == 1)
out << "1 insertion\n";
else
out << es.num_insertions() << " insertions:\n";
for (vector<insertion>::const_iterator i = es.insertions().begin();
i != es.insertions().end();
++i)
{
int idx = i->insertion_point_index();
if (idx < 0)
out << "\t before index of first sequence: " << idx + 1
<< " (" << str1_base[idx + 1] << ")\n";
else
out << "\t after index of first sequence: " << idx
<< " (" << str1_base[idx] << ")\n";
if (!i->inserted_indexes().empty())
out << "\t\t inserted indexes from second sequence: ";
for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();
j != i->inserted_indexes().end();
++j)
{
if (j != i->inserted_indexes().begin())
out << ", ";
out << *j << " (" << str2_base[*j] << ")";
}
out << "\n";
}
out << "\n\n";
}
}//end namespace diff_utils
}//end namespace abigail
#endif // __ABG_DIFF_UTILS_H__