You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
400 lines
13 KiB
400 lines
13 KiB
//===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a simple interactive tool which can be used to manually
|
|
// evaluate symbol search quality of Clangd index.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "index/Serialization.h"
|
|
#include "index/dex/Dex.h"
|
|
#include "index/remote/Client.h"
|
|
#include "llvm/ADT/ScopeExit.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/LineEditor/LineEditor.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Signals.h"
|
|
|
|
namespace clang {
|
|
namespace clangd {
|
|
namespace {
|
|
|
|
llvm::cl::opt<std::string> IndexLocation(
|
|
llvm::cl::desc("<path to index file | remote:server.address>"),
|
|
llvm::cl::Positional);
|
|
|
|
llvm::cl::opt<std::string>
|
|
ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
|
|
|
|
llvm::cl::opt<std::string> ProjectRoot(
|
|
"project-root",
|
|
llvm::cl::desc(
|
|
"Path to the project. Required when connecting using remote index."));
|
|
|
|
static constexpr char Overview[] = R"(
|
|
This is an **experimental** interactive tool to process user-provided search
|
|
queries over given symbol collection obtained via clangd-indexer. The
|
|
tool can be used to evaluate search quality of existing index implementations
|
|
and manually construct non-trivial test cases.
|
|
|
|
You can connect to remote index by passing remote:address to dexp. Example:
|
|
|
|
$ dexp remote:0.0.0.0:9000
|
|
|
|
Type use "help" request to get information about the details.
|
|
)";
|
|
|
|
void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
|
|
const auto TimerStart = std::chrono::high_resolution_clock::now();
|
|
F();
|
|
const auto TimerStop = std::chrono::high_resolution_clock::now();
|
|
const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
TimerStop - TimerStart);
|
|
llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
|
|
}
|
|
|
|
std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
|
|
const SymbolIndex *Index) {
|
|
FuzzyFindRequest Request;
|
|
// Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
|
|
// qualifier for global scope.
|
|
bool IsGlobalScope = QualifiedName.consume_front("::");
|
|
auto Names = splitQualifiedName(QualifiedName);
|
|
if (IsGlobalScope || !Names.first.empty())
|
|
Request.Scopes = {std::string(Names.first)};
|
|
else
|
|
// QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
|
|
// add the global scope to the request.
|
|
Request.Scopes = {""};
|
|
|
|
Request.Query = std::string(Names.second);
|
|
std::vector<SymbolID> SymIDs;
|
|
Index->fuzzyFind(Request, [&](const Symbol &Sym) {
|
|
std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
|
|
if (QualifiedName == SymQualifiedName)
|
|
SymIDs.push_back(Sym.ID);
|
|
});
|
|
return SymIDs;
|
|
}
|
|
|
|
// REPL commands inherit from Command and contain their options as members.
|
|
// Creating a Command populates parser options, parseAndRun() resets them.
|
|
class Command {
|
|
// By resetting the parser options, we lost the standard -help flag.
|
|
llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
|
|
"help", llvm::cl::desc("Display available options"),
|
|
llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::GeneralCategory)};
|
|
// FIXME: Allow commands to signal failure.
|
|
virtual void run() = 0;
|
|
|
|
protected:
|
|
const SymbolIndex *Index;
|
|
|
|
public:
|
|
virtual ~Command() = default;
|
|
bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
|
|
const SymbolIndex &Index) {
|
|
std::string ParseErrs;
|
|
llvm::raw_string_ostream OS(ParseErrs);
|
|
bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
|
|
Overview, &OS);
|
|
// must do this before opts are destroyed
|
|
auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
|
|
if (Help.getNumOccurrences() > 0) {
|
|
// Avoid printing parse errors in this case.
|
|
// (Well, in theory. A bunch get printed to llvm::errs() regardless!)
|
|
llvm::cl::PrintHelpMessage();
|
|
return true;
|
|
}
|
|
|
|
llvm::outs() << OS.str();
|
|
if (Ok) {
|
|
this->Index = &Index;
|
|
reportTime(Argv[0], [&] { run(); });
|
|
}
|
|
return Ok;
|
|
}
|
|
};
|
|
|
|
// FIXME(kbobyrev): Ideas for more commands:
|
|
// * load/swap/reload index: this would make it possible to get rid of llvm::cl
|
|
// usages in the tool driver and actually use llvm::cl library in the REPL.
|
|
// * show posting list density histogram (our dump data somewhere so that user
|
|
// could build one)
|
|
// * show number of tokens of each kind
|
|
// * print out tokens with the most dense posting lists
|
|
// * print out tokens with least dense posting lists
|
|
|
|
class FuzzyFind : public Command {
|
|
llvm::cl::opt<std::string> Query{
|
|
"query",
|
|
llvm::cl::Positional,
|
|
llvm::cl::Required,
|
|
llvm::cl::desc("Query string to be fuzzy-matched"),
|
|
};
|
|
llvm::cl::opt<std::string> Scopes{
|
|
"scopes",
|
|
llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
|
|
};
|
|
llvm::cl::opt<unsigned> Limit{
|
|
"limit",
|
|
llvm::cl::init(10),
|
|
llvm::cl::desc("Max results to display"),
|
|
};
|
|
|
|
void run() override {
|
|
FuzzyFindRequest Request;
|
|
Request.Limit = Limit;
|
|
Request.Query = Query;
|
|
if (Scopes.getNumOccurrences() > 0) {
|
|
llvm::SmallVector<llvm::StringRef, 8> Scopes;
|
|
llvm::StringRef(this->Scopes).split(Scopes, ',');
|
|
Request.Scopes = {Scopes.begin(), Scopes.end()};
|
|
}
|
|
Request.AnyScope = Request.Scopes.empty();
|
|
// FIXME(kbobyrev): Print symbol final scores to see the distribution.
|
|
static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
|
|
llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
|
|
"Symbol Name");
|
|
size_t Rank = 0;
|
|
Index->fuzzyFind(Request, [&](const Symbol &Sym) {
|
|
llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
|
|
Sym.Scope + Sym.Name);
|
|
});
|
|
}
|
|
};
|
|
|
|
class Lookup : public Command {
|
|
llvm::cl::opt<std::string> ID{
|
|
"id",
|
|
llvm::cl::Positional,
|
|
llvm::cl::desc("Symbol ID to look up (hex)"),
|
|
};
|
|
llvm::cl::opt<std::string> Name{
|
|
"name",
|
|
llvm::cl::desc("Qualified name to look up."),
|
|
};
|
|
|
|
void run() override {
|
|
if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
|
|
llvm::errs()
|
|
<< "Missing required argument: please provide id or -name.\n";
|
|
return;
|
|
}
|
|
std::vector<SymbolID> IDs;
|
|
if (ID.getNumOccurrences()) {
|
|
auto SID = SymbolID::fromStr(ID);
|
|
if (!SID) {
|
|
llvm::errs() << llvm::toString(SID.takeError()) << "\n";
|
|
return;
|
|
}
|
|
IDs.push_back(*SID);
|
|
} else {
|
|
IDs = getSymbolIDsFromIndex(Name, Index);
|
|
}
|
|
|
|
LookupRequest Request;
|
|
Request.IDs.insert(IDs.begin(), IDs.end());
|
|
bool FoundSymbol = false;
|
|
Index->lookup(Request, [&](const Symbol &Sym) {
|
|
FoundSymbol = true;
|
|
llvm::outs() << toYAML(Sym);
|
|
});
|
|
if (!FoundSymbol)
|
|
llvm::errs() << "not found\n";
|
|
}
|
|
};
|
|
|
|
class Refs : public Command {
|
|
llvm::cl::opt<std::string> ID{
|
|
"id",
|
|
llvm::cl::Positional,
|
|
llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
|
|
};
|
|
llvm::cl::opt<std::string> Name{
|
|
"name",
|
|
llvm::cl::desc("Qualified name of the symbol being queried."),
|
|
};
|
|
llvm::cl::opt<std::string> Filter{
|
|
"filter",
|
|
llvm::cl::init(".*"),
|
|
llvm::cl::desc(
|
|
"Print all results from files matching this regular expression."),
|
|
};
|
|
|
|
void run() override {
|
|
if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
|
|
llvm::errs()
|
|
<< "Missing required argument: please provide id or -name.\n";
|
|
return;
|
|
}
|
|
std::vector<SymbolID> IDs;
|
|
if (ID.getNumOccurrences()) {
|
|
auto SID = SymbolID::fromStr(ID);
|
|
if (!SID) {
|
|
llvm::errs() << llvm::toString(SID.takeError()) << "\n";
|
|
return;
|
|
}
|
|
IDs.push_back(*SID);
|
|
} else {
|
|
IDs = getSymbolIDsFromIndex(Name, Index);
|
|
if (IDs.size() > 1) {
|
|
llvm::errs() << llvm::formatv(
|
|
"The name {0} is ambiguous, found {1} different "
|
|
"symbols. Please use id flag to disambiguate.\n",
|
|
Name, IDs.size());
|
|
return;
|
|
}
|
|
}
|
|
RefsRequest RefRequest;
|
|
RefRequest.IDs.insert(IDs.begin(), IDs.end());
|
|
llvm::Regex RegexFilter(Filter);
|
|
Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
|
|
auto U = URI::parse(R.Location.FileURI);
|
|
if (!U) {
|
|
llvm::errs() << U.takeError();
|
|
return;
|
|
}
|
|
if (RegexFilter.match(U->body()))
|
|
llvm::outs() << R << "\n";
|
|
});
|
|
}
|
|
};
|
|
|
|
class Export : public Command {
|
|
llvm::cl::opt<IndexFileFormat> Format{
|
|
"format",
|
|
llvm::cl::desc("Format of index export"),
|
|
llvm::cl::values(
|
|
clEnumValN(IndexFileFormat::YAML, "yaml",
|
|
"human-readable YAML format"),
|
|
clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
|
|
llvm::cl::init(IndexFileFormat::YAML),
|
|
};
|
|
llvm::cl::opt<std::string> OutputFile{
|
|
"output-file",
|
|
llvm::cl::Positional,
|
|
llvm::cl::Required,
|
|
llvm::cl::desc("Output file for export"),
|
|
};
|
|
|
|
public:
|
|
void run() override {
|
|
using namespace clang::clangd;
|
|
// Read input file (as specified in global option)
|
|
auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
|
|
if (!Buffer) {
|
|
llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
|
|
return;
|
|
}
|
|
|
|
// Auto-detects input format when parsing
|
|
auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer());
|
|
if (!IndexIn) {
|
|
llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
|
|
return;
|
|
}
|
|
|
|
// Prepare output file
|
|
std::error_code EC;
|
|
llvm::raw_fd_ostream OutputStream(OutputFile, EC);
|
|
if (EC) {
|
|
llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
|
|
<< "\n";
|
|
return;
|
|
}
|
|
|
|
// Export
|
|
clang::clangd::IndexFileOut IndexOut(IndexIn.get());
|
|
IndexOut.Format = Format;
|
|
OutputStream << IndexOut;
|
|
}
|
|
};
|
|
|
|
struct {
|
|
const char *Name;
|
|
const char *Description;
|
|
std::function<std::unique_ptr<Command>()> Implementation;
|
|
} CommandInfo[] = {
|
|
{"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
|
|
{"lookup", "Dump symbol details by ID or qualified name",
|
|
std::make_unique<Lookup>},
|
|
{"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
|
|
{"export", "Export index", std::make_unique<Export>},
|
|
};
|
|
|
|
std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
|
|
return Index.startswith("remote:")
|
|
? remote::getClient(Index.drop_front(strlen("remote:")),
|
|
ProjectRoot)
|
|
: loadIndex(Index, /*UseDex=*/true);
|
|
}
|
|
|
|
bool runCommand(std::string Request, const SymbolIndex &Index) {
|
|
// Split on spaces and add required null-termination.
|
|
std::replace(Request.begin(), Request.end(), ' ', '\0');
|
|
llvm::SmallVector<llvm::StringRef, 8> Args;
|
|
llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
|
|
/*KeepEmpty=*/false);
|
|
if (Args.empty())
|
|
return false;
|
|
if (Args.front() == "help") {
|
|
llvm::outs() << "dexp - Index explorer\nCommands:\n";
|
|
for (const auto &C : CommandInfo)
|
|
llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
|
|
llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
|
|
return true;
|
|
}
|
|
llvm::SmallVector<const char *, 8> FakeArgv;
|
|
for (llvm::StringRef S : Args)
|
|
FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
|
|
|
|
for (const auto &Cmd : CommandInfo) {
|
|
if (Cmd.Name == Args.front())
|
|
return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
|
|
Index);
|
|
}
|
|
llvm::errs() << "Unknown command. Try 'help'.\n";
|
|
return false;
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace clangd
|
|
} // namespace clang
|
|
|
|
int main(int argc, const char *argv[]) {
|
|
using namespace clang::clangd;
|
|
|
|
llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
|
|
llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
|
|
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
|
|
|
|
bool RemoteMode = llvm::StringRef(IndexLocation).startswith("remote:");
|
|
if (RemoteMode && ProjectRoot.empty()) {
|
|
llvm::errs() << "--project-root is required in remote mode\n";
|
|
return -1;
|
|
}
|
|
|
|
std::unique_ptr<SymbolIndex> Index;
|
|
reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
|
|
[&]() { Index = openIndex(IndexLocation); });
|
|
|
|
if (!Index) {
|
|
llvm::errs() << "Failed to open the index.\n";
|
|
return -1;
|
|
}
|
|
|
|
if (!ExecCommand.empty())
|
|
return runCommand(ExecCommand, *Index) ? 0 : 1;
|
|
|
|
llvm::LineEditor LE("dexp");
|
|
while (llvm::Optional<std::string> Request = LE.readLine())
|
|
runCommand(std::move(*Request), *Index);
|
|
}
|