//===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "FileIndex.h"
#include "CollectMacros.h"
#include "ParsedAST.h"
#include "SymbolCollector.h"
#include "index/CanonicalIncludes.h"
#include "index/Index.h"
#include "index/MemIndex.h"
#include "index/Merge.h"
#include "index/Ref.h"
#include "index/Relation.h"
#include "index/Serialization.h"
#include "index/Symbol.h"
#include "index/SymbolID.h"
#include "index/SymbolOrigin.h"
#include "index/dex/Dex.h"
#include "support/Logger.h"
#include "support/MemoryTree.h"
#include "support/Path.h"
#include "clang/AST/ASTContext.h"
#include "clang/Index/IndexingAction.h"
#include "clang/Index/IndexingOptions.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <memory>
#include <tuple>
#include <utility>
#include <vector>

namespace clang {
namespace clangd {
namespace {

SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
                       llvm::ArrayRef<Decl *> DeclsToIndex,
                       const MainFileMacros *MacroRefsToIndex,
                       const CanonicalIncludes &Includes, bool IsIndexMainAST,
                       llvm::StringRef Version, bool CollectMainFileRefs) {
  SymbolCollector::Options CollectorOpts;
  CollectorOpts.CollectIncludePath = true;
  CollectorOpts.Includes = &Includes;
  CollectorOpts.CountReferences = false;
  CollectorOpts.Origin = SymbolOrigin::Dynamic;
  CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;

  index::IndexingOptions IndexOpts;
  // We only need declarations, because we don't count references.
  IndexOpts.SystemSymbolFilter =
      index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
  IndexOpts.IndexFunctionLocals = false;
  if (IsIndexMainAST) {
    // We only collect refs when indexing main AST.
    CollectorOpts.RefFilter = RefKind::All;
    // Comments for main file can always be obtained from sema, do not store
    // them in the index.
    CollectorOpts.StoreAllDocumentation = false;
  } else {
    IndexOpts.IndexMacrosInPreprocessor = true;
    CollectorOpts.CollectMacro = true;
    CollectorOpts.StoreAllDocumentation = true;
  }

  SymbolCollector Collector(std::move(CollectorOpts));
  Collector.setPreprocessor(PP);
  if (MacroRefsToIndex)
    Collector.handleMacros(*MacroRefsToIndex);
  index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);

  const auto &SM = AST.getSourceManager();
  const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
  std::string FileName =
      std::string(MainFileEntry ? MainFileEntry->getName() : "");

  auto Syms = Collector.takeSymbols();
  auto Refs = Collector.takeRefs();
  auto Relations = Collector.takeRelations();

  vlog("indexed {0} AST for {1} version {2}:\n"
       "  symbol slab: {3} symbols, {4} bytes\n"
       "  ref slab: {5} symbols, {6} refs, {7} bytes\n"
       "  relations slab: {8} relations, {9} bytes",
       IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(),
       Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(),
       Relations.size(), Relations.bytes());
  return std::make_tuple(std::move(Syms), std::move(Refs),
                         std::move(Relations));
}

// We keep only the node "U" and its edges. Any node other than "U" will be
// empty in the resultant graph.
IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
  IncludeGraph IG;

  auto Entry = IG.try_emplace(URI).first;
  auto &Node = Entry->getValue();
  Node = FullGraph.lookup(Entry->getKey());
  Node.URI = Entry->getKey();

  // URIs inside nodes must point into the keys of the same IncludeGraph.
  for (auto &Include : Node.DirectIncludes) {
    auto I = IG.try_emplace(Include).first;
    I->getValue().URI = I->getKey();
    Include = I->getKey();
  }
  return IG;
}
} // namespace

FileShardedIndex::FileShardedIndex(IndexFileIn Input)
    : Index(std::move(Input)) {
  // Used to build RelationSlabs.
  llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;

  // Attribute each Symbol to both their declaration and definition locations.
  if (Index.Symbols) {
    for (const auto &S : *Index.Symbols) {
      auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);
      It.first->getValue().Symbols.insert(&S);
      SymbolIDToFile[S.ID] = &It.first->getValue();
      // Only bother if definition file is different than declaration file.
      if (S.Definition &&
          S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
        auto It = Shards.try_emplace(S.Definition.FileURI);
        It.first->getValue().Symbols.insert(&S);
      }
    }
  }
  // Attribute references into each file they occured in.
  if (Index.Refs) {
    for (const auto &SymRefs : *Index.Refs) {
      for (const auto &R : SymRefs.second) {
        const auto It = Shards.try_emplace(R.Location.FileURI);
        It.first->getValue().Refs.insert(&R);
        RefToSymID[&R] = SymRefs.first;
      }
    }
  }
  // The Subject and/or Object shards might be part of multiple TUs. In
  // such cases there will be a race and the last TU to write the shard
  // will win and all the other relations will be lost. To avoid this,
  // we store relations in both shards. A race might still happen if the
  // same translation unit produces different relations under different
  // configurations, but that's something clangd doesn't handle in general.
  if (Index.Relations) {
    for (const auto &R : *Index.Relations) {
      // FIXME: RelationSlab shouldn't contain dangling relations.
      FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject);
      FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object);
      if (SubjectFile)
        SubjectFile->Relations.insert(&R);
      if (ObjectFile && ObjectFile != SubjectFile)
        ObjectFile->Relations.insert(&R);
    }
  }
  // Store only the direct includes of a file in a shard.
  if (Index.Sources) {
    const auto &FullGraph = *Index.Sources;
    for (const auto &It : FullGraph) {
      auto ShardIt = Shards.try_emplace(It.first());
      ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
    }
  }
}
std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
  // It should be enough to construct a vector with {Shards.keys().begin(),
  // Shards.keys().end()} but MSVC fails to compile that.
  std::vector<PathRef> Result;
  Result.reserve(Shards.size());
  for (auto Key : Shards.keys())
    Result.push_back(Key);
  return Result;
}

llvm::Optional<IndexFileIn>
FileShardedIndex::getShard(llvm::StringRef Uri) const {
  auto It = Shards.find(Uri);
  if (It == Shards.end())
    return llvm::None;

  IndexFileIn IF;
  IF.Sources = It->getValue().IG;
  IF.Cmd = Index.Cmd;

  SymbolSlab::Builder SymB;
  for (const auto *S : It->getValue().Symbols)
    SymB.insert(*S);
  IF.Symbols = std::move(SymB).build();

  RefSlab::Builder RefB;
  for (const auto *Ref : It->getValue().Refs) {
    auto SID = RefToSymID.lookup(Ref);
    RefB.insert(SID, *Ref);
  }
  IF.Refs = std::move(RefB).build();

  RelationSlab::Builder RelB;
  for (const auto *Rel : It->getValue().Relations) {
    RelB.insert(*Rel);
  }
  IF.Relations = std::move(RelB).build();
  // Explicit move here is needed by some compilers.
  return std::move(IF);
}

SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs) {
  return indexSymbols(
      AST.getASTContext(), AST.getPreprocessorPtr(),
      AST.getLocalTopLevelDecls(), &AST.getMacros(), AST.getCanonicalIncludes(),
      /*IsIndexMainAST=*/true, AST.version(), CollectMainFileRefs);
}

SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
                             std::shared_ptr<Preprocessor> PP,
                             const CanonicalIncludes &Includes) {
  std::vector<Decl *> DeclsToIndex(
      AST.getTranslationUnitDecl()->decls().begin(),
      AST.getTranslationUnitDecl()->decls().end());
  return indexSymbols(AST, std::move(PP), DeclsToIndex,
                      /*MainFileMacros=*/nullptr, Includes,
                      /*IsIndexMainAST=*/false, Version,
                      /*CollectMainFileRefs=*/false);
}

void FileSymbols::update(llvm::StringRef Key,
                         std::unique_ptr<SymbolSlab> Symbols,
                         std::unique_ptr<RefSlab> Refs,
                         std::unique_ptr<RelationSlab> Relations,
                         bool CountReferences) {
  std::lock_guard<std::mutex> Lock(Mutex);
  ++Version;
  if (!Symbols)
    SymbolsSnapshot.erase(Key);
  else
    SymbolsSnapshot[Key] = std::move(Symbols);
  if (!Refs) {
    RefsSnapshot.erase(Key);
  } else {
    RefSlabAndCountReferences Item;
    Item.CountReferences = CountReferences;
    Item.Slab = std::move(Refs);
    RefsSnapshot[Key] = std::move(Item);
  }
  if (!Relations)
    RelationsSnapshot.erase(Key);
  else
    RelationsSnapshot[Key] = std::move(Relations);
}

std::unique_ptr<SymbolIndex>
FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
                        size_t *Version) {
  std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
  std::vector<std::shared_ptr<RefSlab>> RefSlabs;
  std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
  std::vector<RefSlab *> MainFileRefs;
  {
    std::lock_guard<std::mutex> Lock(Mutex);
    for (const auto &FileAndSymbols : SymbolsSnapshot)
      SymbolSlabs.push_back(FileAndSymbols.second);
    for (const auto &FileAndRefs : RefsSnapshot) {
      RefSlabs.push_back(FileAndRefs.second.Slab);
      if (FileAndRefs.second.CountReferences)
        MainFileRefs.push_back(RefSlabs.back().get());
    }
    for (const auto &FileAndRelations : RelationsSnapshot)
      RelationSlabs.push_back(FileAndRelations.second);

    if (Version)
      *Version = this->Version;
  }
  std::vector<const Symbol *> AllSymbols;
  std::vector<Symbol> SymsStorage;
  switch (DuplicateHandle) {
  case DuplicateHandling::Merge: {
    llvm::DenseMap<SymbolID, Symbol> Merged;
    for (const auto &Slab : SymbolSlabs) {
      for (const auto &Sym : *Slab) {
        assert(Sym.References == 0 &&
               "Symbol with non-zero references sent to FileSymbols");
        auto I = Merged.try_emplace(Sym.ID, Sym);
        if (!I.second)
          I.first->second = mergeSymbol(I.first->second, Sym);
      }
    }
    for (const RefSlab *Refs : MainFileRefs)
      for (const auto &Sym : *Refs) {
        auto It = Merged.find(Sym.first);
        // This might happen while background-index is still running.
        if (It == Merged.end())
          continue;
        It->getSecond().References += Sym.second.size();
      }
    SymsStorage.reserve(Merged.size());
    for (auto &Sym : Merged) {
      SymsStorage.push_back(std::move(Sym.second));
      AllSymbols.push_back(&SymsStorage.back());
    }
    break;
  }
  case DuplicateHandling::PickOne: {
    llvm::DenseSet<SymbolID> AddedSymbols;
    for (const auto &Slab : SymbolSlabs)
      for (const auto &Sym : *Slab) {
        assert(Sym.References == 0 &&
               "Symbol with non-zero references sent to FileSymbols");
        if (AddedSymbols.insert(Sym.ID).second)
          AllSymbols.push_back(&Sym);
      }
    break;
  }
  }

  std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
  {
    llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
    size_t Count = 0;
    for (const auto &RefSlab : RefSlabs)
      for (const auto &Sym : *RefSlab) {
        MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
        Count += Sym.second.size();
      }
    RefsStorage.reserve(Count);
    AllRefs.reserve(MergedRefs.size());
    for (auto &Sym : MergedRefs) {
      auto &SymRefs = Sym.second;
      // Sorting isn't required, but yields more stable results over rebuilds.
      llvm::sort(SymRefs);
      llvm::copy(SymRefs, back_inserter(RefsStorage));
      AllRefs.try_emplace(
          Sym.first,
          llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
                              SymRefs.size()));
    }
  }

  std::vector<Relation> AllRelations;
  for (const auto &RelationSlab : RelationSlabs) {
    for (const auto &R : *RelationSlab)
      AllRelations.push_back(R);
  }
  // Sort relations and remove duplicates that could arise due to
  // relations being stored in both the shards containing their
  // subject and object.
  llvm::sort(AllRelations);
  AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()),
                     AllRelations.end());

  size_t StorageSize =
      RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
  for (const auto &Slab : SymbolSlabs)
    StorageSize += Slab->bytes();
  for (const auto &RefSlab : RefSlabs)
    StorageSize += RefSlab->bytes();

  // Index must keep the slabs and contiguous ranges alive.
  switch (Type) {
  case IndexType::Light:
    return std::make_unique<MemIndex>(
        llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
        std::move(AllRelations),
        std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
                        std::move(RefsStorage), std::move(SymsStorage)),
        StorageSize);
  case IndexType::Heavy:
    return std::make_unique<dex::Dex>(
        llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
        std::move(AllRelations),
        std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
                        std::move(RefsStorage), std::move(SymsStorage)),
        StorageSize);
  }
  llvm_unreachable("Unknown clangd::IndexType");
}

void FileSymbols::profile(MemoryTree &MT) const {
  std::lock_guard<std::mutex> Lock(Mutex);
  for (const auto &SymSlab : SymbolsSnapshot) {
    MT.detail(SymSlab.first())
        .child("symbols")
        .addUsage(SymSlab.second->bytes());
  }
  for (const auto &RefSlab : RefsSnapshot) {
    MT.detail(RefSlab.first())
        .child("references")
        .addUsage(RefSlab.second.Slab->bytes());
  }
  for (const auto &RelSlab : RelationsSnapshot) {
    MT.detail(RelSlab.first())
        .child("relations")
        .addUsage(RelSlab.second->bytes());
  }
}

FileIndex::FileIndex(bool UseDex, bool CollectMainFileRefs)
    : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex),
      CollectMainFileRefs(CollectMainFileRefs),
      PreambleIndex(std::make_unique<MemIndex>()),
      MainFileIndex(std::make_unique<MemIndex>()) {}

void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
                               ASTContext &AST,
                               std::shared_ptr<Preprocessor> PP,
                               const CanonicalIncludes &Includes) {
  IndexFileIn IF;
  std::tie(IF.Symbols, std::ignore, IF.Relations) =
      indexHeaderSymbols(Version, AST, std::move(PP), Includes);
  FileShardedIndex ShardedIndex(std::move(IF));
  for (auto Uri : ShardedIndex.getAllSources()) {
    auto IF = ShardedIndex.getShard(Uri);
    // We are using the key received from ShardedIndex, so it should always
    // exist.
    assert(IF);
    PreambleSymbols.update(
        Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
        std::make_unique<RefSlab>(),
        std::make_unique<RelationSlab>(std::move(*IF->Relations)),
        /*CountReferences=*/false);
  }
  size_t IndexVersion = 0;
  auto NewIndex =
      PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
                                 DuplicateHandling::PickOne, &IndexVersion);
  {
    std::lock_guard<std::mutex> Lock(UpdateIndexMu);
    if (IndexVersion <= PreambleIndexVersion) {
      // We lost the race, some other thread built a later version.
      return;
    }
    PreambleIndexVersion = IndexVersion;
    PreambleIndex.reset(std::move(NewIndex));
    vlog(
        "Build dynamic index for header symbols with estimated memory usage of "
        "{0} bytes",
        PreambleIndex.estimateMemoryUsage());
  }
}

void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
  auto Contents = indexMainDecls(AST, CollectMainFileRefs);
  MainFileSymbols.update(
      Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
      std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
      std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
      /*CountReferences=*/true);
  size_t IndexVersion = 0;
  auto NewIndex = MainFileSymbols.buildIndex(
      IndexType::Light, DuplicateHandling::Merge, &IndexVersion);
  {
    std::lock_guard<std::mutex> Lock(UpdateIndexMu);
    if (IndexVersion <= MainIndexVersion) {
      // We lost the race, some other thread built a later version.
      return;
    }
    MainIndexVersion = IndexVersion;
    MainFileIndex.reset(std::move(NewIndex));
    vlog(
        "Build dynamic index for main-file symbols with estimated memory usage "
        "of {0} bytes",
        MainFileIndex.estimateMemoryUsage());
  }
}

void FileIndex::profile(MemoryTree &MT) const {
  PreambleSymbols.profile(MT.child("preamble").child("slabs"));
  MT.child("preamble")
      .child("index")
      .addUsage(PreambleIndex.estimateMemoryUsage());
  MainFileSymbols.profile(MT.child("main_file").child("slabs"));
  MT.child("main_file")
      .child("index")
      .addUsage(MainFileIndex.estimateMemoryUsage());
}
} // namespace clangd
} // namespace clang