You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

794 lines
21 KiB

/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef textParser_DEFINED
#define textParser_DEFINED
#include <functional>
#include "bookmaker.h"
class BmhParser;
class Definition;
class TextParser : public NonAssignable {
TextParser() {} // only for ParserCommon, TextParserSave
friend class ParserCommon;
friend class TextParserSave;
public:
virtual ~TextParser() {}
TextParser(string fileName, const char* start, const char* end, int lineCount)
: fFileName(fileName)
, fStart(start)
, fLine(start)
, fChar(start)
, fEnd(end)
, fLineCount(lineCount)
{
}
TextParser(const Definition* );
const char* anyOf(const char* str) const {
const char* ptr = fChar;
while (ptr < fEnd) {
if (strchr(str, ptr[0])) {
return ptr;
}
++ptr;
}
return nullptr;
}
const char* anyOf(const char* wordStart, const char* wordList[], size_t wordListCount) const {
const char** wordPtr = wordList;
const char** wordEnd = wordPtr + wordListCount;
const size_t matchLen = fChar - wordStart;
while (wordPtr < wordEnd) {
const char* word = *wordPtr++;
if (strlen(word) == matchLen && !strncmp(wordStart, word, matchLen)) {
return word;
}
}
return nullptr;
}
// words must be alpha only
string anyWord(const vector<string>& wordList, int spaces) const {
const char* matchStart = fChar;
do {
int count = spaces;
while (matchStart < fEnd && !isalpha(matchStart[0])) {
++matchStart;
}
const char* matchEnd = matchStart;
const char* nextWord = nullptr;
while (matchEnd < fEnd) {
if (isalpha(matchEnd[0])) {
} else if (' ' == matchEnd[0] && --count >= 0) {
if (!nextWord) {
nextWord = matchEnd;
}
} else {
break;
}
++matchEnd;
}
size_t matchLen = matchEnd - matchStart;
for (auto word : wordList) {
if (word.length() != matchLen) {
continue;
}
for (unsigned index = 0; index < matchLen; ++index) {
if (tolower(matchStart[index]) != word[index]) {
goto nextWord;
}
}
return word;
nextWord: ;
}
matchStart = nextWord ? nextWord : matchEnd;
} while (matchStart < fEnd);
return "";
}
bool back(const char* pattern) {
size_t len = strlen(pattern);
const char* start = fChar - len;
if (start <= fStart) {
return false;
}
if (strncmp(start, pattern, len)) {
return false;
}
fChar = start;
return true;
}
char backup(const char* pattern) const {
size_t len = strlen(pattern);
const char* start = fChar - len;
if (start <= fStart) {
return '\0';
}
if (strncmp(start, pattern, len)) {
return '\0';
}
return start[-1];
}
void backupWord() {
while (fChar > fStart && isalpha(fChar[-1])) {
--fChar;
}
}
bool contains(const char* match, const char* lineEnd, const char** loc) const {
const char* result = this->strnstr(match, lineEnd);
if (loc) {
*loc = result;
}
return result;
}
bool containsWord(const char* match, const char* lineEnd, const char** loc) {
size_t len = strlen(match);
do {
const char* result = this->strnstr(match, lineEnd);
if (!result) {
return false;
}
if ((result > fStart && isalnum(result[-1])) || (result + len < fEnd
&& isalnum(result[len]))) {
fChar = result + len;
continue;
}
if (loc) {
*loc = result;
}
return true;
} while (true);
}
// either /n/n or /n# will stop parsing a typedef
const char* doubleLF() const {
const char* ptr = fChar - 1;
const char* doubleStart = nullptr;
while (++ptr < fEnd) {
if (!doubleStart) {
if ('\n' == ptr[0]) {
doubleStart = ptr;
}
continue;
}
if ('\n' == ptr[0] || '#' == ptr[0]) {
return doubleStart;
}
if (' ' < ptr[0]) {
doubleStart = nullptr;
}
}
return nullptr;
}
bool endsWith(const char* match) {
int matchLen = strlen(match);
if (matchLen > fChar - fLine) {
return false;
}
return !strncmp(fChar - matchLen, match, matchLen);
}
bool eof() const { return fChar >= fEnd; }
const char* lineEnd() const {
const char* ptr = fChar;
do {
if (ptr >= fEnd) {
return ptr;
}
char test = *ptr++;
if (test == '\n' || test == '\0') {
break;
}
} while (true);
return ptr;
}
ptrdiff_t lineLength() const {
return this->lineEnd() - fLine;
}
bool match(TextParser* );
char next() {
SkASSERT(fChar < fEnd);
char result = *fChar++;
if ('\n' == result) {
++fLineCount;
fLine = fChar;
}
return result;
}
char peek() const { SkASSERT(fChar < fEnd); return *fChar; }
void restorePlace(const TextParser& save) {
fChar = save.fChar;
fLine = save.fLine;
fLineCount = save.fLineCount;
}
void savePlace(TextParser* save) {
save->fChar = fChar;
save->fLine = fLine;
save->fLineCount = fLineCount;
}
void reportError(const char* errorStr) const;
static string ReportFilename(string file);
void reportWarning(const char* errorStr) const;
template <typename T> T reportError(const char* errorStr) const {
this->reportError(errorStr);
return T();
}
bool sentenceEnd(const char* check) const {
while (check > fStart) {
--check;
if (' ' < check[0] && '.' != check[0]) {
return false;
}
if ('.' == check[0]) {
return ' ' >= check[1];
}
if ('\n' == check[0] && '\n' == check[1]) {
return true;
}
}
return true;
}
void setForErrorReporting(const Definition* , const char* );
bool skipToBalancedEndBracket(char startB, char endB) {
SkASSERT(fChar < fEnd);
SkASSERT(startB == fChar[0]);
int startCount = 0;
do {
char test = this->next();
startCount += startB == test;
startCount -= endB == test;
} while (startCount && fChar < fEnd);
return !startCount;
}
bool skipToEndBracket(char endBracket, const char* end = nullptr) {
if (nullptr == end) {
end = fEnd;
}
while (fChar[0] != endBracket) {
if (fChar >= end) {
return false;
}
(void) this->next();
}
return true;
}
bool skipToEndBracket(const char* endBracket) {
size_t len = strlen(endBracket);
while (strncmp(fChar, endBracket, len)) {
if (fChar >= fEnd) {
return false;
}
(void) this->next();
}
return true;
}
bool skipLine() {
return skipToEndBracket('\n');
}
void skipTo(const char* skip) {
while (fChar < skip) {
this->next();
}
}
void skipToAlpha() {
while (fChar < fEnd && !isalpha(fChar[0])) {
fChar++;
}
}
// returns true if saw close brace
bool skipToAlphaNum() {
bool sawCloseBrace = false;
while (fChar < fEnd && !isalnum(fChar[0])) {
sawCloseBrace |= '}' == *fChar++;
}
return sawCloseBrace;
}
bool skipExact(const char* pattern) {
if (!this->startsWith(pattern)) {
return false;
}
this->skipName(pattern);
return true;
}
// differs from skipToNonAlphaNum in that a.b isn't considered a full name,
// since a.b can't be found as a named definition
void skipFullName() {
do {
char last = '\0';
while (fChar < fEnd && (isalnum(fChar[0])
|| '_' == fChar[0] /* || '-' == fChar[0] */
|| (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]))) {
if (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) {
fChar++;
}
last = fChar[0];
fChar++;
}
if (fChar + 1 >= fEnd || '/' != fChar[0] || !isalpha(last) || !isalpha(fChar[1])) {
break; // stop unless pattern is xxx/xxx as in I/O
}
fChar++; // skip slash
} while (true);
}
int skipToLineBalance(char open, char close) {
int match = 0;
while (!this->eof() && '\n' != fChar[0]) {
match += open == this->peek();
match -= close == this->next();
}
return match;
}
bool skipToLineStart() {
if (!this->skipLine()) {
return false;
}
if (!this->eof()) {
return this->skipWhiteSpace();
}
return true;
}
void skipToLineStart(int* indent, bool* sawReturn) {
SkAssertResult(this->skipLine());
this->skipWhiteSpace(indent, sawReturn);
}
void skipLower() {
while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) {
fChar++;
}
}
void skipToNonAlphaNum() {
while (fChar < fEnd && (isalnum(fChar[0]) || '_' == fChar[0])) {
fChar++;
}
}
void skipToNonName() {
while (fChar < fEnd && (isalnum(fChar[0])
|| '_' == fChar[0] || '-' == fChar[0]
|| (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1])
|| ('.' == fChar[0] && fChar + 1 < fEnd && isalpha(fChar[1])))) {
if (':' == fChar[0] && fChar +1 < fEnd && ':' == fChar[1]) {
fChar++;
}
fChar++;
}
}
void skipPhraseName() {
while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) {
fChar++;
}
}
void skipToSpace() {
while (fChar < fEnd && ' ' != fChar[0]) {
fChar++;
}
}
void skipToWhiteSpace() {
while (fChar < fEnd && ' ' < fChar[0]) {
fChar++;
}
}
bool skipName(const char* word) {
size_t len = strlen(word);
if (len <= (size_t) (fEnd - fChar) && !strncmp(word, fChar, len)) {
for (size_t i = 0; i < len; ++i) {
this->next();
}
}
return this->eof() || ' ' >= fChar[0];
}
bool skipSpace() {
while (' ' == this->peek()) {
(void) this->next();
if (fChar >= fEnd) {
return false;
}
}
return true;
}
bool skipWord(const char* word) {
if (!this->skipWhiteSpace()) {
return false;
}
const char* save = fChar;
if (!this->skipName(word)) {
fChar = save;
return false;
}
if (!this->skipWhiteSpace()) {
return false;
}
return true;
}
bool skipWhiteSpace() {
while (' ' >= this->peek()) {
(void) this->next();
if (fChar >= fEnd) {
return false;
}
}
return true;
}
bool skipWhiteSpace(int* indent, bool* skippedReturn) {
while (' ' >= this->peek()) {
*indent = *skippedReturn ? *indent + 1 : 1;
if ('\n' == this->peek()) {
*skippedReturn |= true;
*indent = 0;
}
(void) this->next();
if (fChar >= fEnd) {
return false;
}
}
return true;
}
bool startsWith(const char* str) const {
size_t len = strlen(str);
ptrdiff_t lineLen = fEnd - fChar;
return len <= (size_t) lineLen && 0 == strncmp(str, fChar, len);
}
// ignores minor white space differences
bool startsWith(const char* str, size_t oLen) const {
size_t tIndex = 0;
size_t tLen = fEnd - fChar;
size_t oIndex = 0;
while (oIndex < oLen && tIndex < tLen) {
bool tSpace = ' ' >= fChar[tIndex];
bool oSpace = ' ' >= str[oIndex];
if (tSpace != oSpace) {
break;
}
if (tSpace) {
do {
++tIndex;
} while (tIndex < tLen && ' ' >= fChar[tIndex]);
do {
++oIndex;
} while (oIndex < oLen && ' ' >= str[oIndex]);
continue;
}
if (fChar[tIndex] != str[oIndex]) {
break;
}
++tIndex;
++oIndex;
}
return oIndex >= oLen;
}
const char* strnchr(char ch, const char* end) const {
const char* ptr = fChar;
while (ptr < end) {
if (ptr[0] == ch) {
return ptr;
}
++ptr;
}
return nullptr;
}
const char* strnstr(const char *match, const char* end) const {
size_t matchLen = strlen(match);
SkASSERT(matchLen > 0);
ptrdiff_t len = end - fChar;
SkASSERT(len >= 0);
if ((size_t) len < matchLen ) {
return nullptr;
}
size_t count = len - matchLen;
for (size_t index = 0; index <= count; index++) {
if (0 == strncmp(&fChar[index], match, matchLen)) {
return &fChar[index];
}
}
return nullptr;
}
const char* trimmedBracketEnd(const char bracket) const {
int max = (int) (this->lineLength());
int index = 0;
while (index < max && bracket != fChar[index]) {
++index;
}
SkASSERT(index < max);
while (index > 0 && ' ' >= fChar[index - 1]) {
--index;
}
return fChar + index;
}
const char* trimmedBracketEnd(string bracket) const {
size_t max = (size_t) (this->lineLength());
string line(fChar, max);
size_t index = line.find(bracket);
SkASSERT(index < max);
while (index > 0 && ' ' >= fChar[index - 1]) {
--index;
}
return fChar + index;
}
const char* trimmedBracketNoEnd(const char bracket) const {
int max = (int) (fEnd - fChar);
int index = 0;
while (index < max && bracket != fChar[index]) {
++index;
}
SkASSERT(index < max);
while (index > 0 && ' ' >= fChar[index - 1]) {
--index;
}
return fChar + index;
}
const char* trimmedLineEnd() const {
const char* result = this->lineEnd();
while (result > fChar && ' ' >= result[-1]) {
--result;
}
return result;
}
void trimEnd() {
while (fEnd > fStart && ' ' >= fEnd[-1]) {
--fEnd;
}
}
// FIXME: nothing else in TextParser knows from C++ --
// there could be a class between TextParser and ParserCommon
virtual string typedefName();
const char* wordEnd() const {
const char* end = fChar;
while (isalnum(end[0]) || '_' == end[0] || '-' == end[0]) {
++end;
}
return end;
}
string fFileName;
const char* fStart;
const char* fLine;
const char* fChar;
const char* fEnd;
size_t fLineCount;
};
class TextParserSave {
public:
TextParserSave(TextParser* parser) {
fParser = parser;
fSave.fFileName = parser->fFileName;
fSave.fStart = parser->fStart;
fSave.fLine = parser->fLine;
fSave.fChar = parser->fChar;
fSave.fEnd = parser->fEnd;
fSave.fLineCount = parser->fLineCount;
}
void restore() const {
fParser->fFileName = fSave.fFileName;
fParser->fStart = fSave.fStart;
fParser->fLine = fSave.fLine;
fParser->fChar = fSave.fChar;
fParser->fEnd = fSave.fEnd;
fParser->fLineCount = fSave.fLineCount;
}
private:
TextParser* fParser;
TextParser fSave;
};
static inline bool has_nonwhitespace(string s) {
bool nonwhite = false;
for (const char& c : s) {
if (' ' < c) {
nonwhite = true;
break;
}
}
return nonwhite;
}
static inline void trim_end(string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
}
static inline void trim_end_spaces(string &s) {
while (s.length() > 0 && ' ' == s.back()) {
s.pop_back();
}
}
static inline void trim_start(string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace))));
}
static inline void trim_start_end(string& s) {
trim_start(s);
trim_end(s);
}
static inline string trim_inline_spaces(string s) {
bool lastSpace = false;
string trimmed;
for (const char* ptr = &s.front(); ptr <= &s.back(); ++ptr) {
char c = *ptr;
if (' ' >= c) {
if (!lastSpace) {
trimmed += ' ';
}
lastSpace = true;
continue;
}
lastSpace = false;
trimmed += c;
}
return trimmed;
}
class EscapeParser : public TextParser {
public:
EscapeParser(const char* start, const char* end) :
TextParser("", start, end, 0) {
const char* reader = fStart;
fStorage = new char[end - start];
char* writer = fStorage;
while (reader < fEnd) {
char ch = *reader++;
if (ch != '\\') {
*writer++ = ch;
} else {
char ctrl = *reader++;
if (ctrl == 'u') {
unsigned unicode = 0;
for (int i = 0; i < 4; ++i) {
unicode <<= 4;
SkASSERT((reader[0] >= '0' && reader[0] <= '9') ||
(reader[0] >= 'A' && reader[0] <= 'F') ||
(reader[0] >= 'a' && reader[0] <= 'f'));
int nibble = *reader++ - '0';
if (nibble > 9) {
nibble = (nibble & ~('a' - 'A')) - 'A' + '9' + 1;
}
unicode |= nibble;
}
SkASSERT(unicode < 256);
*writer++ = (unsigned char) unicode;
} else {
SkASSERT(ctrl == 'n');
*writer++ = '\n';
}
}
}
fStart = fLine = fChar = fStorage;
fEnd = writer;
}
~EscapeParser() override {
delete fStorage;
}
private:
char* fStorage;
};
// some methods cannot be trivially parsed; look for class-name / ~ / operator
class MethodParser : public TextParser {
public:
MethodParser(string className, string fileName,
const char* start, const char* end, int lineCount)
: TextParser(fileName, start, end, lineCount)
, fClassName(className) {
size_t doubleColons = className.find_last_of("::");
if (string::npos != doubleColons) {
fLocalName = className.substr(doubleColons + 1);
SkASSERT(fLocalName.length() > 0);
}
}
~MethodParser() override {}
string localName() const {
return fLocalName;
}
void setLocalName(string name) {
if (name == fClassName) {
fLocalName = "";
} else {
fLocalName = name;
}
}
// returns true if close brace was skipped
int skipToMethodStart() {
if (!fClassName.length()) {
return this->skipToAlphaNum();
}
int braceCount = 0;
while (!this->eof() && !isalnum(this->peek()) && '~' != this->peek()) {
braceCount += '{' == this->peek();
braceCount -= '}' == this->peek();
this->next();
}
return braceCount;
}
void skipToMethodEnd(Resolvable resolvable);
bool wordEndsWith(const char* str) const {
const char* space = this->strnchr(' ', fEnd);
if (!space) {
return false;
}
size_t len = strlen(str);
if (space < fChar + len) {
return false;
}
return !strncmp(str, space - len, len);
}
private:
string fClassName;
string fLocalName;
typedef TextParser INHERITED;
};
#endif