You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
9.4 KiB

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2002-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
**********************************************************************
*/
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
#include "unicode/uscript.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
#include "uarrsort.h"
#include "uinvchar.h"
#define INCLUDED_FROM_PROPNAME_CPP
#include "propname_data.h"
U_CDECL_BEGIN
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
int32_t PropNameData::findProperty(int32_t property) {
int32_t i=1; // valueMaps index, initially after numRanges
for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
// Read and skip the start and limit of this range.
int32_t start=valueMaps[i];
int32_t limit=valueMaps[i+1];
i+=2;
if(property<start) {
break;
}
if(property<limit) {
return i+(property-start)*2;
}
i+=(limit-start)*2; // Skip all entries for this range.
}
return 0;
}
int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
if(valueMapIndex==0) {
return 0; // The property does not have named values.
}
++valueMapIndex; // Skip the BytesTrie offset.
int32_t numRanges=valueMaps[valueMapIndex++];
if(numRanges<0x10) {
// Ranges of values.
for(; numRanges>0; --numRanges) {
// Read and skip the start and limit of this range.
int32_t start=valueMaps[valueMapIndex];
int32_t limit=valueMaps[valueMapIndex+1];
valueMapIndex+=2;
if(value<start) {
break;
}
if(value<limit) {
return valueMaps[valueMapIndex+value-start];
}
valueMapIndex+=limit-start; // Skip all entries for this range.
}
} else {
// List of values.
int32_t valuesStart=valueMapIndex;
int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
do {
int32_t v=valueMaps[valueMapIndex];
if(value<v) {
break;
}
if(value==v) {
return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
}
} while(++valueMapIndex<nameGroupOffsetsStart);
}
return 0;
}
const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
int32_t numNames=*nameGroup++;
if(nameIndex<0 || numNames<=nameIndex) {
return NULL;
}
// Skip nameIndex names.
for(; nameIndex>0; --nameIndex) {
nameGroup=uprv_strchr(nameGroup, 0)+1;
}
if(*nameGroup==0) {
return NULL; // no name (Property[Value]Aliases.txt has "n/a")
}
return nameGroup;
}
UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
if(name==NULL) {
return FALSE;
}
UStringTrieResult result=USTRINGTRIE_NO_VALUE;
char c;
while((c=*name++)!=0) {
c=uprv_invCharToLowercaseAscii(c);
// Ignore delimiters '-', '_', and ASCII White_Space.
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
continue;
}
if(!USTRINGTRIE_HAS_NEXT(result)) {
return FALSE;
}
result=trie.next((uint8_t)c);
}
return USTRINGTRIE_HAS_VALUE(result);
}
const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
int32_t valueMapIndex=findProperty(property);
if(valueMapIndex==0) {
return NULL; // Not a known property.
}
return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
}
const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
int32_t valueMapIndex=findProperty(property);
if(valueMapIndex==0) {
return NULL; // Not a known property.
}
int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
if(nameGroupOffset==0) {
return NULL;
}
return getName(nameGroups+nameGroupOffset, nameChoice);
}
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
BytesTrie trie(bytesTries+bytesTrieOffset);
if(containsName(trie, alias)) {
return trie.getValue();
} else {
return UCHAR_INVALID_CODE;
}
}
int32_t PropNameData::getPropertyEnum(const char *alias) {
return getPropertyOrValueEnum(0, alias);
}
int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
int32_t valueMapIndex=findProperty(property);
if(valueMapIndex==0) {
return UCHAR_INVALID_CODE; // Not a known property.
}
valueMapIndex=valueMaps[valueMapIndex+1];
if(valueMapIndex==0) {
return UCHAR_INVALID_CODE; // The property does not have named values.
}
// valueMapIndex is the start of the property's valueMap,
// where the first word is the BytesTrie offset.
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}
U_NAMESPACE_END
//----------------------------------------------------------------------
// Public API implementation
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice) {
U_NAMESPACE_USE
return PropNameData::getPropertyName(property, nameChoice);
}
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias) {
U_NAMESPACE_USE
return (UProperty)PropNameData::getPropertyEnum(alias);
}
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
int32_t value,
UPropertyNameChoice nameChoice) {
U_NAMESPACE_USE
return PropNameData::getPropertyValueName(property, value, nameChoice);
}
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
const char* alias) {
U_NAMESPACE_USE
return PropNameData::getPropertyValueEnum(property, alias);
}
U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_LONG_PROPERTY_NAME);
}
U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_SHORT_PROPERTY_NAME);
}