You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

284 lines
8.2 KiB

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: nptrans.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION
#if !UCONFIG_NO_IDNA
#include "nptrans.h"
#include "unicode/resbund.h"
#include "unicode/uniset.h"
#include "sprpimpl.h"
#include "cmemory.h"
#include "ustr_imp.h"
#include "intltest.h"
#ifdef NPTRANS_DEBUG
#include <stdio.h>
#endif
const char NamePrepTransform::fgClassID=0;
//Factory method
NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
NamePrepTransform* transform = new NamePrepTransform(parseError, status);
if(U_FAILURE(status)){
delete transform;
return NULL;
}
return transform;
}
//constructor
NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
: mapping(nullptr), unassigned(), prohibited(), labelSeparatorSet(), bundle(nullptr) {
LocalPointer<Transliterator> lmapping;
LocalUResourceBundlePointer lbundle;
const char* testDataName = IntlTest::loadTestData(status);
if(U_FAILURE(status)){
return;
}
lbundle.adoptInstead(ures_openDirect(testDataName,"idna_rules",&status));
if(lbundle.isValid() && U_SUCCESS(status)){
// create the mapping transliterator
int32_t ruleLen = 0;
const UChar* ruleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNFKC",&ruleLen, &status);
int32_t mapRuleLen = 0;
const UChar *mapRuleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNoNormalization", &mapRuleLen, &status);
UnicodeString rule(mapRuleUChar, mapRuleLen);
rule.append(ruleUChar, ruleLen);
lmapping.adoptInstead( Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
UTRANS_FORWARD, parseError,status));
if(U_FAILURE(status)) {
return;
}
//create the unassigned set
int32_t patternLen =0;
const UChar* pattern = ures_getStringByKey(lbundle.getAlias(),"UnassignedSet",&patternLen, &status);
unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
//create prohibited set
patternLen=0;
pattern = ures_getStringByKey(lbundle.getAlias(),"ProhibitedSet",&patternLen, &status);
UnicodeString test(pattern,patternLen);
prohibited.applyPattern(test,status);
#ifdef NPTRANS_DEBUG
if(U_FAILURE(status)){
printf("Construction of Unicode set failed\n");
}
if(U_SUCCESS(status)){
if(prohibited.contains((UChar) 0x644)){
printf("The string contains 0x644 ... !!\n");
}
UnicodeString temp;
prohibited.toPattern(temp,TRUE);
for(int32_t i=0;i<temp.length();i++){
printf("%c", (char)temp.charAt(i));
}
printf("\n");
}
#endif
//create label separator set
patternLen=0;
pattern = ures_getStringByKey(lbundle.getAlias(), "LabelSeparatorSet", &patternLen, &status);
labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
}
if(U_SUCCESS(status) && (lmapping.isNull())) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(status)) {
return;
}
mapping = lmapping.orphan();
bundle = lbundle.orphan();
}
UBool NamePrepTransform::isProhibited(UChar32 ch){
return (UBool)(ch != ASCII_SPACE);
}
NamePrepTransform::~NamePrepTransform(){
delete mapping;
mapping = NULL;
//close the bundle
ures_close(bundle);
bundle = NULL;
}
int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* /*parseError*/,
UErrorCode& status ){
if(U_FAILURE(status)){
return 0;
}
//check arguments
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
UnicodeString rsource(src,srcLength);
// map the code points
// transliteration also performs NFKC
mapping->transliterate(rsource);
const UChar* buffer = rsource.getBuffer();
int32_t bufLen = rsource.length();
// check if unassigned
if(allowUnassigned == FALSE){
int32_t bufIndex=0;
UChar32 ch =0 ;
for(;bufIndex<bufLen;){
U16_NEXT(buffer, bufIndex, bufLen, ch);
if(unassigned.contains(ch)){
status = U_IDNA_UNASSIGNED_ERROR;
return 0;
}
}
}
// check if there is enough room in the output
if(bufLen < destCapacity){
u_memcpy(dest, buffer, bufLen);
}
return u_terminateUChars(dest, destCapacity, bufLen, &status);
}
#define MAX_BUFFER_SIZE 300
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status ){
// check error status
if(U_FAILURE(status)){
return 0;
}
//check arguments
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
UnicodeString b1String;
UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
int32_t b1Len;
int32_t b1Index = 0;
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
UBool leftToRight=FALSE, rightToLeft=FALSE;
b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
b1String.releaseBuffer(b1Len);
if(status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b1 = b1String.getBuffer(b1Len);
status = U_ZERO_ERROR; // reset error
b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
b1String.releaseBuffer(b1Len);
}
if(U_FAILURE(status)){
b1Len = 0;
goto CLEANUP;
}
for(; b1Index<b1Len; ){
UChar32 ch = 0;
U16_NEXT(b1, b1Index, b1Len, ch);
if(prohibited.contains(ch) && ch!=0x0020){
status = U_IDNA_PROHIBITED_ERROR;
b1Len = 0;
goto CLEANUP;
}
direction = u_charDirection(ch);
if(firstCharDir==U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
if(direction == U_LEFT_TO_RIGHT){
leftToRight = TRUE;
}
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
rightToLeft = TRUE;
}
}
// satisfy 2
if( leftToRight == TRUE && rightToLeft == TRUE){
status = U_IDNA_CHECK_BIDI_ERROR;
b1Len = 0;
goto CLEANUP;
}
//satisfy 3
if( rightToLeft == TRUE &&
!((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
){
status = U_IDNA_CHECK_BIDI_ERROR;
return FALSE;
}
if(b1Len <= destCapacity){
u_memmove(dest, b1, b1Len);
}
CLEANUP:
return u_terminateUChars(dest, destCapacity, b1Len, &status);
}
UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
// check error status
if(U_FAILURE(status)){
return FALSE;
}
return labelSeparatorSet.contains(ch);
}
#endif /* #if !UCONFIG_NO_IDNA */
#endif /* #if !UCONFIG_NO_TRANSLITERATION */