You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
249 lines
7.2 KiB
249 lines
7.2 KiB
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 2003-2014, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
*
|
|
* File prscmnts.cpp
|
|
*
|
|
* Modification History:
|
|
*
|
|
* Date Name Description
|
|
* 08/22/2003 ram Creation.
|
|
*******************************************************************************
|
|
*/
|
|
|
|
// Safer use of UnicodeString.
|
|
#ifndef UNISTR_FROM_CHAR_EXPLICIT
|
|
# define UNISTR_FROM_CHAR_EXPLICIT explicit
|
|
#endif
|
|
|
|
// Less important, but still a good idea.
|
|
#ifndef UNISTR_FROM_STRING_EXPLICIT
|
|
# define UNISTR_FROM_STRING_EXPLICIT explicit
|
|
#endif
|
|
|
|
#include "unicode/regex.h"
|
|
#include "unicode/unistr.h"
|
|
#include "unicode/parseerr.h"
|
|
#include "prscmnts.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
U_NAMESPACE_USE
|
|
|
|
#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
|
|
|
|
#define MAX_SPLIT_STRINGS 20
|
|
|
|
const char *patternStrings[UPC_LIMIT]={
|
|
"^translate\\s*(.*)",
|
|
"^note\\s*(.*)"
|
|
};
|
|
|
|
U_CFUNC int32_t
|
|
removeText(UChar *source, int32_t srcLen,
|
|
UnicodeString patString,uint32_t options,
|
|
UnicodeString replaceText, UErrorCode *status){
|
|
|
|
if(status == NULL || U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
|
|
UnicodeString src(source, srcLen);
|
|
|
|
RegexMatcher myMatcher(patString, src, options, *status);
|
|
if(U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
UnicodeString dest;
|
|
|
|
|
|
dest = myMatcher.replaceAll(replaceText,*status);
|
|
|
|
|
|
return dest.extract(source, srcLen, *status);
|
|
|
|
}
|
|
U_CFUNC int32_t
|
|
trim(UChar *src, int32_t srcLen, UErrorCode *status){
|
|
srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
|
|
srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
|
|
srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
|
|
return srcLen;
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
|
|
srcLen = trim(source, srcLen, status);
|
|
UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
|
|
srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
|
|
return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
getText(const UChar* source, int32_t srcLen,
|
|
UChar** dest, int32_t destCapacity,
|
|
UnicodeString patternString,
|
|
UErrorCode* status){
|
|
|
|
if(status == NULL || U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
|
|
UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
|
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
|
|
UnicodeString src (source,srcLen);
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
|
|
|
|
RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
|
|
matcher.reset(stringArray[i]);
|
|
if(matcher.lookingAt(*status)){
|
|
UnicodeString out = matcher.group(1, *status);
|
|
|
|
return out.extract(*dest, destCapacity,*status);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
#define AT_SIGN 0x0040
|
|
|
|
U_CFUNC int32_t
|
|
getDescription( const UChar* source, int32_t srcLen,
|
|
UChar** dest, int32_t destCapacity,
|
|
UErrorCode* status){
|
|
if(status == NULL || U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
|
|
UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
|
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
|
|
UnicodeString src(source, srcLen);
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
|
|
|
|
if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
|
|
int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
|
|
return trim(*dest, destLen, status);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
getCount(const UChar* source, int32_t srcLen,
|
|
UParseCommentsOption option, UErrorCode *status){
|
|
|
|
if(status == NULL || U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
|
|
UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
|
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
|
|
UnicodeString src (source, srcLen);
|
|
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
|
|
|
|
UnicodeString patternString(patternStrings[option]);
|
|
RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
int32_t count = 0;
|
|
for(int32_t i=0; i<retLen; i++){
|
|
matcher.reset(stringArray[i]);
|
|
if(matcher.lookingAt(*status)){
|
|
count++;
|
|
}
|
|
}
|
|
if(option == UPC_TRANSLATE && count > 1){
|
|
fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
|
|
exit(U_UNSUPPORTED_ERROR);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
getAt(const UChar* source, int32_t srcLen,
|
|
UChar** dest, int32_t destCapacity,
|
|
int32_t index,
|
|
UParseCommentsOption option,
|
|
UErrorCode* status){
|
|
|
|
if(status == NULL || U_FAILURE(*status)){
|
|
return 0;
|
|
}
|
|
|
|
UnicodeString stringArray[MAX_SPLIT_STRINGS];
|
|
RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
|
|
UnicodeString src (source, srcLen);
|
|
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
|
|
|
|
UnicodeString patternString(patternStrings[option]);
|
|
RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
int32_t count = 0;
|
|
for(int32_t i=0; i<retLen; i++){
|
|
matcher.reset(stringArray[i]);
|
|
if(matcher.lookingAt(*status)){
|
|
if(count == index){
|
|
UnicodeString out = matcher.group(1, *status);
|
|
return out.extract(*dest, destCapacity,*status);
|
|
}
|
|
count++;
|
|
|
|
}
|
|
}
|
|
return 0;
|
|
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
getTranslate( const UChar* source, int32_t srcLen,
|
|
UChar** dest, int32_t destCapacity,
|
|
UErrorCode* status){
|
|
UnicodeString notePatternString("^translate\\s*?(.*)");
|
|
|
|
int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
|
|
return trim(*dest, destLen, status);
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
getNote(const UChar* source, int32_t srcLen,
|
|
UChar** dest, int32_t destCapacity,
|
|
UErrorCode* status){
|
|
|
|
UnicodeString notePatternString("^note\\s*?(.*)");
|
|
int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
|
|
return trim(*dest, destLen, status);
|
|
|
|
}
|
|
|
|
#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
|
|
|