You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
304 lines
12 KiB
304 lines
12 KiB
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
**********************************************************************
|
|
* Copyright (C) 2001-2009, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
* Date Name Description
|
|
* 05/23/00 aliu Creation.
|
|
**********************************************************************
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <vector>
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/edits.h"
|
|
#include "unicode/unistr.h"
|
|
#include "unicode/utf16.h"
|
|
#include "cmemory.h"
|
|
#include "testutil.h"
|
|
#include "intltest.h"
|
|
|
|
static const UChar HEX[] = u"0123456789ABCDEF";
|
|
|
|
UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) {
|
|
if (ch >= 0x10000) {
|
|
if (ch >= 0x100000) {
|
|
buf.append(HEX[0xF&(ch>>20)]);
|
|
}
|
|
buf.append(HEX[0xF&(ch>>16)]);
|
|
}
|
|
buf.append(HEX[0xF&(ch>>12)]);
|
|
buf.append(HEX[0xF&(ch>>8)]);
|
|
buf.append(HEX[0xF&(ch>>4)]);
|
|
buf.append(HEX[0xF&ch]);
|
|
return buf;
|
|
}
|
|
|
|
UnicodeString TestUtility::hex(UChar32 ch) {
|
|
UnicodeString buf;
|
|
appendHex(buf, ch);
|
|
return buf;
|
|
}
|
|
|
|
UnicodeString TestUtility::hex(const UnicodeString& s) {
|
|
return hex(s, u',');
|
|
}
|
|
|
|
UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) {
|
|
UnicodeString result;
|
|
if (s.isEmpty()) return result;
|
|
UChar32 c;
|
|
for (int32_t i = 0; i < s.length(); i += U16_LENGTH(c)) {
|
|
c = s.char32At(i);
|
|
if (i > 0) {
|
|
result.append(sep);
|
|
}
|
|
appendHex(result, c);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
|
|
UnicodeString buf;
|
|
for (int32_t i = 0; i < len; ++i) {
|
|
buf.append(HEX[0x0F & (bytes[i] >> 4)]);
|
|
buf.append(HEX[0x0F & bytes[i]]);
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
namespace {
|
|
|
|
UnicodeString printOneEdit(const Edits::Iterator &ei) {
|
|
if (ei.hasChange()) {
|
|
return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
|
|
} else {
|
|
return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Maps indexes according to the expected edits.
|
|
* A destination index can occur multiple times when there are source deletions.
|
|
* Map according to the last occurrence, normally in a non-empty destination span.
|
|
* Simplest is to search from the back.
|
|
*/
|
|
int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
|
|
int32_t srcLength, int32_t destLength, int32_t index) {
|
|
int32_t srcIndex = srcLength;
|
|
int32_t destIndex = destLength;
|
|
int32_t i = expLength;
|
|
while (index < destIndex && i > 0) {
|
|
--i;
|
|
int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
|
|
int32_t prevDestIndex = destIndex - expected[i].newLength;
|
|
if (index == prevDestIndex) {
|
|
return prevSrcIndex;
|
|
} else if (index > prevDestIndex) {
|
|
if (expected[i].change) {
|
|
// In a change span, map to its end.
|
|
return srcIndex;
|
|
} else {
|
|
// In an unchanged span, offset within it.
|
|
return prevSrcIndex + (index - prevDestIndex);
|
|
}
|
|
}
|
|
srcIndex = prevSrcIndex;
|
|
destIndex = prevDestIndex;
|
|
}
|
|
// index is outside the string.
|
|
return srcIndex;
|
|
}
|
|
|
|
int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
|
|
int32_t srcLength, int32_t destLength, int32_t index) {
|
|
int32_t srcIndex = srcLength;
|
|
int32_t destIndex = destLength;
|
|
int32_t i = expLength;
|
|
while (index < srcIndex && i > 0) {
|
|
--i;
|
|
int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
|
|
int32_t prevDestIndex = destIndex - expected[i].newLength;
|
|
if (index == prevSrcIndex) {
|
|
return prevDestIndex;
|
|
} else if (index > prevSrcIndex) {
|
|
if (expected[i].change) {
|
|
// In a change span, map to its end.
|
|
return destIndex;
|
|
} else {
|
|
// In an unchanged span, offset within it.
|
|
return prevDestIndex + (index - prevSrcIndex);
|
|
}
|
|
}
|
|
srcIndex = prevSrcIndex;
|
|
destIndex = prevDestIndex;
|
|
}
|
|
// index is outside the string.
|
|
return destIndex;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// For debugging, set -v to see matching edits up to a failure.
|
|
UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
|
|
const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
|
|
Edits::Iterator ei1 = e1.getFineIterator();
|
|
Edits::Iterator ei2 = e2.getFineIterator();
|
|
UBool ok = TRUE;
|
|
for (int32_t i = 0; ok; ++i) {
|
|
UBool ei1HasNext = ei1.next(errorCode);
|
|
UBool ei2HasNext = ei2.next(errorCode);
|
|
ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
|
|
ei1HasNext, ei2HasNext);
|
|
ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
|
|
ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
|
|
printOneEdit(ei1), printOneEdit(ei2));
|
|
if (!ei1HasNext || !ei2HasNext) {
|
|
break;
|
|
}
|
|
test.logln();
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
void TestUtility::checkEditsIter(
|
|
IntlTest &test,
|
|
const UnicodeString &name,
|
|
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
|
|
const EditChange expected[], int32_t expLength, UBool withUnchanged,
|
|
UErrorCode &errorCode) {
|
|
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode));
|
|
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode));
|
|
|
|
int32_t expSrcIndex = 0;
|
|
int32_t expDestIndex = 0;
|
|
int32_t expReplIndex = 0;
|
|
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
|
|
const EditChange &expect = expected[expIndex];
|
|
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
|
|
if (withUnchanged || expect.change) {
|
|
test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode));
|
|
test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange());
|
|
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
|
|
}
|
|
|
|
if (expect.oldLength > 0) {
|
|
test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
|
|
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
|
|
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
|
|
if (!withUnchanged) {
|
|
// For some iterators, move past the current range
|
|
// so that findSourceIndex() has to look before the current index.
|
|
ei2.next(errorCode);
|
|
ei2.next(errorCode);
|
|
}
|
|
}
|
|
|
|
if (expect.newLength > 0) {
|
|
test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
|
|
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
|
|
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
|
|
if (!withUnchanged) {
|
|
// For some iterators, move past the current range
|
|
// so that findSourceIndex() has to look before the current index.
|
|
ei2.next(errorCode);
|
|
ei2.next(errorCode);
|
|
}
|
|
}
|
|
|
|
expSrcIndex += expect.oldLength;
|
|
expDestIndex += expect.newLength;
|
|
if (expect.change) {
|
|
expReplIndex += expect.newLength;
|
|
}
|
|
}
|
|
UnicodeString msg = UnicodeString(name).append(u" end");
|
|
test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
|
|
test.assertFalse(msg + u":" + __LINE__, ei1.hasChange());
|
|
test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength());
|
|
test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength());
|
|
test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
|
|
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
|
|
|
|
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
|
|
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
|
|
|
|
// Check mapping of all indexes against a simple implementation
|
|
// that works on the expected changes.
|
|
// Iterate once forward, once backward, to cover more runtime conditions.
|
|
int32_t srcLength = expSrcIndex;
|
|
int32_t destLength = expDestIndex;
|
|
std::vector<int32_t> srcIndexes;
|
|
std::vector<int32_t> destIndexes;
|
|
srcIndexes.push_back(-1);
|
|
destIndexes.push_back(-1);
|
|
int32_t srcIndex = 0;
|
|
int32_t destIndex = 0;
|
|
for (int32_t i = 0; i < expLength; ++i) {
|
|
if (expected[i].oldLength > 0) {
|
|
srcIndexes.push_back(srcIndex);
|
|
if (expected[i].oldLength > 1) {
|
|
srcIndexes.push_back(srcIndex + 1);
|
|
if (expected[i].oldLength > 2) {
|
|
srcIndexes.push_back(srcIndex + expected[i].oldLength - 1);
|
|
}
|
|
}
|
|
}
|
|
if (expected[i].newLength > 0) {
|
|
destIndexes.push_back(destIndex);
|
|
if (expected[i].newLength > 1) {
|
|
destIndexes.push_back(destIndex + 1);
|
|
if (expected[i].newLength > 2) {
|
|
destIndexes.push_back(destIndex + expected[i].newLength - 1);
|
|
}
|
|
}
|
|
}
|
|
srcIndex += expected[i].oldLength;
|
|
destIndex += expected[i].newLength;
|
|
}
|
|
srcIndexes.push_back(srcLength);
|
|
destIndexes.push_back(destLength);
|
|
srcIndexes.push_back(srcLength + 1);
|
|
destIndexes.push_back(destLength + 1);
|
|
std::reverse(destIndexes.begin(), destIndexes.end());
|
|
// Zig-zag across the indexes to stress next() <-> previous().
|
|
static const int32_t ZIG_ZAG[] = { 0, 1, 2, 3, 2, 1 };
|
|
for (auto i = 0; i < (int32_t)srcIndexes.size(); ++i) {
|
|
for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) {
|
|
int32_t j = ZIG_ZAG[ij];
|
|
if ((i + j) < (int32_t)srcIndexes.size()) {
|
|
int32_t si = srcIndexes[i + j];
|
|
test.assertEquals(name + u" destIndexFromSrc(" + si + u"):" + __LINE__,
|
|
destIndexFromSrc(expected, expLength, srcLength, destLength, si),
|
|
ei2.destinationIndexFromSourceIndex(si, errorCode));
|
|
}
|
|
}
|
|
}
|
|
for (auto i = 0; i < (int32_t)destIndexes.size(); ++i) {
|
|
for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) {
|
|
int32_t j = ZIG_ZAG[ij];
|
|
if ((i + j) < (int32_t)destIndexes.size()) {
|
|
int32_t di = destIndexes[i + j];
|
|
test.assertEquals(name + u" srcIndexFromDest(" + di + u"):" + __LINE__,
|
|
srcIndexFromDest(expected, expLength, srcLength, destLength, di),
|
|
ei2.sourceIndexFromDestinationIndex(di, errorCode));
|
|
}
|
|
}
|
|
}
|
|
}
|