You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
3.1 KiB

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 22 2004
* Since: ICU 3.0
**********************************************************************
*/
#include "tokiter.h"
#include "textfile.h"
#include "patternprops.h"
#include "util.h"
#include "uprops.h"
TokenIterator::TokenIterator(TextFile* r) {
reader = r;
done = haveLine = FALSE;
pos = lastpos = -1;
}
TokenIterator::~TokenIterator() {
}
UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
if (done || U_FAILURE(ec)) {
return FALSE;
}
token.truncate(0);
for (;;) {
if (!haveLine) {
if (!reader->readLineSkippingComments(line, ec)) {
done = TRUE;
return FALSE;
}
haveLine = TRUE;
pos = 0;
}
lastpos = pos;
if (!nextToken(token, ec)) {
haveLine = FALSE;
if (U_FAILURE(ec)) return FALSE;
continue;
}
return TRUE;
}
}
int32_t TokenIterator::getLineNumber() const {
return reader->getLineNumber();
}
/**
* Read the next token from 'this->line' and append it to 'token'.
* Tokens are separated by Pattern_White_Space. Tokens may also be
* delimited by double or single quotes. The closing quote must match
* the opening quote. If a '#' is encountered, the rest of the line
* is ignored, unless it is backslash-escaped or within quotes.
* @param token the token is appended to this StringBuffer
* @param ec input-output error code
* @return TRUE if a valid token is found, or FALSE if the end
* of the line is reached or an error occurs
*/
UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
ICU_Utility::skipWhitespace(line, pos, TRUE);
if (pos == line.length()) {
return FALSE;
}
UChar c = line.charAt(pos++);
UChar quote = 0;
switch (c) {
case 34/*'"'*/:
case 39/*'\\'*/:
quote = c;
break;
case 35/*'#'*/:
return FALSE;
default:
token.append(c);
break;
}
while (pos < line.length()) {
c = line.charAt(pos); // 16-bit ok
if (c == 92/*'\\'*/) {
UChar32 c32 = line.unescapeAt(pos);
if (c32 < 0) {
ec = U_MALFORMED_UNICODE_ESCAPE;
return FALSE;
}
token.append(c32);
} else if ((quote != 0 && c == quote) ||
(quote == 0 && PatternProps::isWhiteSpace(c))) {
++pos;
return TRUE;
} else if (quote == 0 && c == '#') {
return TRUE; // do NOT increment
} else {
token.append(c);
++pos;
}
}
if (quote != 0) {
ec = U_UNTERMINATED_QUOTE;
return FALSE;
}
return TRUE;
}