You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
5.2 KiB
170 lines
5.2 KiB
//===-- Clustering.h --------------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// Utilities to compute benchmark result clusters.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
|
|
#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
|
|
|
|
#include "BenchmarkResult.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include <limits>
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
namespace exegesis {
|
|
|
|
class InstructionBenchmarkClustering {
|
|
public:
|
|
enum ModeE { Dbscan, Naive };
|
|
|
|
// Clusters `Points` using DBSCAN with the given parameters. See the cc file
|
|
// for more explanations on the algorithm.
|
|
static Expected<InstructionBenchmarkClustering>
|
|
create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
|
|
size_t DbscanMinPts, double AnalysisClusteringEpsilon,
|
|
Optional<unsigned> NumOpcodes = None);
|
|
|
|
class ClusterId {
|
|
public:
|
|
static ClusterId noise() { return ClusterId(kNoise); }
|
|
static ClusterId error() { return ClusterId(kError); }
|
|
static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
|
|
return ClusterId(Id, IsUnstable);
|
|
}
|
|
static ClusterId makeValidUnstable(size_t Id) {
|
|
return makeValid(Id, /*IsUnstable=*/true);
|
|
}
|
|
|
|
ClusterId() : Id_(kUndef), IsUnstable_(false) {}
|
|
|
|
// Compare id's, ignoring the 'unstability' bit.
|
|
bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
|
|
bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
|
|
|
|
bool isValid() const { return Id_ <= kMaxValid; }
|
|
bool isUnstable() const { return IsUnstable_; }
|
|
bool isNoise() const { return Id_ == kNoise; }
|
|
bool isError() const { return Id_ == kError; }
|
|
bool isUndef() const { return Id_ == kUndef; }
|
|
|
|
// Precondition: isValid().
|
|
size_t getId() const {
|
|
assert(isValid());
|
|
return Id_;
|
|
}
|
|
|
|
private:
|
|
ClusterId(size_t Id, bool IsUnstable = false)
|
|
: Id_(Id), IsUnstable_(IsUnstable) {}
|
|
|
|
static constexpr const size_t kMaxValid =
|
|
(std::numeric_limits<size_t>::max() >> 1) - 4;
|
|
static constexpr const size_t kNoise = kMaxValid + 1;
|
|
static constexpr const size_t kError = kMaxValid + 2;
|
|
static constexpr const size_t kUndef = kMaxValid + 3;
|
|
|
|
size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
|
|
size_t IsUnstable_ : 1;
|
|
};
|
|
static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
|
|
|
|
struct Cluster {
|
|
Cluster() = delete;
|
|
explicit Cluster(const ClusterId &Id) : Id(Id) {}
|
|
|
|
const ClusterId Id;
|
|
// Indices of benchmarks within the cluster.
|
|
std::vector<int> PointIndices;
|
|
};
|
|
|
|
ClusterId getClusterIdForPoint(size_t P) const {
|
|
return ClusterIdForPoint_[P];
|
|
}
|
|
|
|
const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
|
|
|
|
const Cluster &getCluster(ClusterId Id) const {
|
|
assert(!Id.isUndef() && "unlabeled cluster");
|
|
if (Id.isNoise()) {
|
|
return NoiseCluster_;
|
|
}
|
|
if (Id.isError()) {
|
|
return ErrorCluster_;
|
|
}
|
|
return Clusters_[Id.getId()];
|
|
}
|
|
|
|
const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
|
|
|
|
// Returns true if the given point is within a distance Epsilon of each other.
|
|
bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
|
|
const std::vector<BenchmarkMeasure> &Q,
|
|
const double EpsilonSquared_) const {
|
|
double DistanceSquared = 0.0;
|
|
for (size_t I = 0, E = P.size(); I < E; ++I) {
|
|
const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
|
|
DistanceSquared += Diff * Diff;
|
|
}
|
|
return DistanceSquared <= EpsilonSquared_;
|
|
}
|
|
|
|
private:
|
|
InstructionBenchmarkClustering(
|
|
const std::vector<InstructionBenchmark> &Points,
|
|
double AnalysisClusteringEpsilonSquared);
|
|
|
|
Error validateAndSetup();
|
|
|
|
void clusterizeDbScan(size_t MinPts);
|
|
void clusterizeNaive(unsigned NumOpcodes);
|
|
|
|
// Stabilization is only needed if dbscan was used to clusterize.
|
|
void stabilize(unsigned NumOpcodes);
|
|
|
|
void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
|
|
|
|
bool areAllNeighbours(ArrayRef<size_t> Pts) const;
|
|
|
|
const std::vector<InstructionBenchmark> &Points_;
|
|
const double AnalysisClusteringEpsilonSquared_;
|
|
|
|
int NumDimensions_ = 0;
|
|
// ClusterForPoint_[P] is the cluster id for Points[P].
|
|
std::vector<ClusterId> ClusterIdForPoint_;
|
|
std::vector<Cluster> Clusters_;
|
|
Cluster NoiseCluster_;
|
|
Cluster ErrorCluster_;
|
|
};
|
|
|
|
class SchedClassClusterCentroid {
|
|
public:
|
|
const std::vector<PerInstructionStats> &getStats() const {
|
|
return Representative;
|
|
}
|
|
|
|
std::vector<BenchmarkMeasure> getAsPoint() const;
|
|
|
|
void addPoint(ArrayRef<BenchmarkMeasure> Point);
|
|
|
|
bool validate(InstructionBenchmark::ModeE Mode) const;
|
|
|
|
private:
|
|
// Measurement stats for the points in the SchedClassCluster.
|
|
std::vector<PerInstructionStats> Representative;
|
|
};
|
|
|
|
} // namespace exegesis
|
|
} // namespace llvm
|
|
|
|
#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
|