You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

280 lines
9.6 KiB

//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Transformer/Parsing.h"
#include "clang/AST/Expr.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Transformer/RangeSelector.h"
#include "clang/Tooling/Transformer/SourceCode.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include <string>
#include <utility>
#include <vector>
using namespace clang;
using namespace transformer;
// FIXME: This implementation is entirely separate from that of the AST
// matchers. Given the similarity of the languages and uses of the two parsers,
// the two should share a common parsing infrastructure, as should other
// Transformer types. We intend to unify this implementation soon to share as
// much as possible with the AST Matchers parsing.
namespace {
using llvm::Error;
using llvm::Expected;
template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
struct ParseState {
// The remaining input to be processed.
StringRef Input;
// The original input. Not modified during parsing; only for reference in
// error reporting.
StringRef OriginalInput;
};
// Represents an intermediate result returned by a parsing function. Functions
// that don't generate values should use `llvm::None`
template <typename ResultType> struct ParseProgress {
ParseState State;
// Intermediate result generated by the Parser.
ResultType Value;
};
template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
class ParseError : public llvm::ErrorInfo<ParseError> {
public:
// Required field for all ErrorInfo derivatives.
static char ID;
ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
: Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
Excerpt(std::move(InputExcerpt)) {}
void log(llvm::raw_ostream &OS) const override {
OS << "parse error at position (" << Pos << "): " << ErrorMsg
<< ": " + Excerpt;
}
std::error_code convertToErrorCode() const override {
return llvm::inconvertibleErrorCode();
}
// Position of the error in the input string.
size_t Pos;
std::string ErrorMsg;
// Excerpt of the input starting at the error position.
std::string Excerpt;
};
char ParseError::ID;
} // namespace
static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors() {
static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
{"name", name},
{"node", node},
{"statement", statement},
{"statements", statements},
{"member", member},
{"callArgs", callArgs},
{"elseBranch", elseBranch},
{"initListElements", initListElements}};
return M;
}
static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
{"before", before}, {"after", after}, {"expansion", expansion}};
return M;
}
static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors() {
static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
{"encloseNodes", encloseNodes}};
return M;
}
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
M = {{"enclose", enclose}, {"between", between}};
return M;
}
template <typename Element>
llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
llvm::StringRef Key) {
auto it = Map.find(Key);
if (it == Map.end())
return llvm::None;
return it->second;
}
template <typename ResultType>
ParseProgress<ResultType> makeParseProgress(ParseState State,
ResultType Result) {
return ParseProgress<ResultType>{State, std::move(Result)};
}
static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
size_t Pos = S.OriginalInput.size() - S.Input.size();
return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
S.OriginalInput.substr(Pos, 20).str());
}
// Returns a new ParseState that advances \c S by \c N characters.
static ParseState advance(ParseState S, size_t N) {
S.Input = S.Input.drop_front(N);
return S;
}
static StringRef consumeWhitespace(StringRef S) {
return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
}
// Parses a single expected character \c c from \c State, skipping preceding
// whitespace. Error if the expected character isn't found.
static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
State.Input = consumeWhitespace(State.Input);
if (State.Input.empty() || State.Input.front() != c)
return makeParseError(State,
("expected char not found: " + llvm::Twine(c)).str());
return makeParseProgress(advance(State, 1), llvm::None);
}
// Parses an identitifer "token" -- handles preceding whitespace.
static ExpectedProgress<std::string> parseId(ParseState State) {
State.Input = consumeWhitespace(State.Input);
auto Id = State.Input.take_while(
[](char c) { return isASCII(c) && isIdentifierBody(c); });
if (Id.empty())
return makeParseError(State, "failed to parse name");
return makeParseProgress(advance(State, Id.size()), Id.str());
}
// For consistency with the AST matcher parser and C++ code, node ids are
// written as strings. However, we do not support escaping in the string.
static ExpectedProgress<std::string> parseStringId(ParseState State) {
State.Input = consumeWhitespace(State.Input);
if (State.Input.empty())
return makeParseError(State, "unexpected end of input");
if (!State.Input.consume_front("\""))
return makeParseError(
State,
"expecting string, but encountered other character or end of input");
StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
if (State.Input.size() == Id.size())
return makeParseError(State, "unterminated string");
// Advance past the trailing quote as well.
return makeParseProgress(advance(State, Id.size() + 1), Id.str());
}
// Parses a single element surrounded by parens. `Op` is applied to the parsed
// result to create the result of this function call.
template <typename T>
ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
RangeSelectorOp<T> Op,
ParseState State) {
auto P = parseChar('(', State);
if (!P)
return P.takeError();
auto E = ParseElement(P->State);
if (!E)
return E.takeError();
P = parseChar(')', E->State);
if (!P)
return P.takeError();
return makeParseProgress(P->State, Op(std::move(E->Value)));
}
// Parses a pair of elements surrounded by parens and separated by comma. `Op`
// is applied to the parsed results to create the result of this function call.
template <typename T>
ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
RangeSelectorOp<T, T> Op,
ParseState State) {
auto P = parseChar('(', State);
if (!P)
return P.takeError();
auto Left = ParseElement(P->State);
if (!Left)
return Left.takeError();
P = parseChar(',', Left->State);
if (!P)
return P.takeError();
auto Right = ParseElement(P->State);
if (!Right)
return Right.takeError();
P = parseChar(')', Right->State);
if (!P)
return P.takeError();
return makeParseProgress(P->State,
Op(std::move(Left->Value), std::move(Right->Value)));
}
// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
// Id operator). Returns StencilType representing the operator on success and
// error if it fails to parse input for an operator.
static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State) {
auto Id = parseId(State);
if (!Id)
return Id.takeError();
std::string OpName = std::move(Id->Value);
if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
return parseSingle(parseStringId, *Op, Id->State);
if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
return parsePair(parseStringId, *Op, Id->State);
if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
return parsePair(parseRangeSelectorImpl, *Op, Id->State);
return makeParseError(State, "unknown selector name: " + OpName);
}
Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
ParseState State = {Input, Input};
ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
if (!Result)
return Result.takeError();
State = Result->State;
// Discard any potentially trailing whitespace.
State.Input = consumeWhitespace(State.Input);
if (State.Input.empty())
return Result->Value;
return makeParseError(State, "unexpected input after selector");
}