You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

230 lines
7.2 KiB

/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_UTILS_PROFILER_H
#define TNT_UTILS_PROFILER_H
#include <assert.h>
#include <stdint.h>
#include <array>
#include <chrono>
#if defined(__linux__)
# include <unistd.h>
# include <sys/ioctl.h>
# include <linux/perf_event.h>
#endif
namespace utils {
class Profiler {
enum {
INSTRUCTIONS = 0, // must be zero
CPU_CYCLES = 1,
DCACHE_REFS = 2,
DCACHE_MISSES = 3,
BRANCHES = 4,
BRANCH_MISSES = 5,
ICACHE_REFS = 6,
ICACHE_MISSES = 7,
// Must be last one
EVENT_COUNT
};
public:
enum {
EV_CPU_CYCLES = 1 << CPU_CYCLES,
EV_L1D_REFS = 1 << DCACHE_REFS,
EV_L1D_MISSES = 1 << DCACHE_MISSES,
EV_BPU_REFS = 1 << BRANCHES,
EV_BPU_MISSES = 1 << BRANCH_MISSES,
EV_L1I_REFS = 1 << ICACHE_REFS,
EV_L1I_MISSES = 1 << ICACHE_MISSES,
// helpers
EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
};
static Profiler& get() noexcept;
Profiler(const Profiler& rhs) = delete;
Profiler(Profiler&& rhs) = delete;
Profiler& operator=(const Profiler& rhs) = delete;
Profiler& operator=(Profiler&& rhs) = delete;
// selects which events are enabled.
// By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES
uint32_t resetEvents(uint32_t eventMask) noexcept;
uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }
// could return false if performance counters are not supported/enabled
bool isValid() const { return mCountersFd[0] >= 0; }
class Counters {
friend class Profiler;
uint64_t nr;
uint64_t time_enabled;
uint64_t time_running;
struct {
uint64_t value;
uint64_t id;
} counters[Profiler::EVENT_COUNT];
friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
lhs.nr -= rhs.nr;
lhs.time_enabled -= rhs.time_enabled;
lhs.time_running -= rhs.time_running;
for (size_t i=0 ; i<EVENT_COUNT ; ++i) {
lhs.counters[i].value -= rhs.counters[i].value;
}
return lhs;
}
public:
uint64_t getInstructions() const { return counters[INSTRUCTIONS].value; }
uint64_t getCpuCycles() const { return counters[CPU_CYCLES].value; }
uint64_t getL1DReferences() const { return counters[DCACHE_REFS].value; }
uint64_t getL1DMisses() const { return counters[DCACHE_MISSES].value; }
uint64_t getL1IReferences() const { return counters[ICACHE_REFS].value; }
uint64_t getL1IMisses() const { return counters[ICACHE_MISSES].value; }
uint64_t getBranchInstructions() const { return counters[BRANCHES].value; }
uint64_t getBranchMisses() const { return counters[BRANCH_MISSES].value; }
std::chrono::duration<uint64_t, std::nano> getWallTime() const {
return std::chrono::duration<uint64_t, std::nano>(time_enabled);
}
std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
return std::chrono::duration<uint64_t, std::nano>(time_running);
}
double getIPC() const noexcept {
uint64_t cpuCycles = getCpuCycles();
uint64_t instructions = getInstructions();
return double(instructions) / double(cpuCycles);
}
double getCPI() const noexcept {
uint64_t cpuCycles = getCpuCycles();
uint64_t instructions = getInstructions();
return double(cpuCycles) / double(instructions);
}
double getL1DMissRate() const noexcept {
uint64_t cacheReferences = getL1DReferences();
uint64_t cacheMisses = getL1DMisses();
return double(cacheMisses) / double(cacheReferences);
}
double getL1DHitRate() const noexcept {
return 1.0 - getL1DMissRate();
}
double getL1IMissRate() const noexcept {
uint64_t cacheReferences = getL1IReferences();
uint64_t cacheMisses = getL1IMisses();
return double(cacheMisses) / double(cacheReferences);
}
double getL1IHitRate() const noexcept {
return 1.0 - getL1IMissRate();
}
double getBranchMissRate() const noexcept {
uint64_t branchReferences = getBranchInstructions();
uint64_t branchMisses = getBranchMisses();
return double(branchMisses) / double(branchReferences);
}
double getBranchHitRate() const noexcept {
return 1.0 - getBranchMissRate();
}
double getMPKI(uint64_t misses) const noexcept {
return (misses * 1000.0) / getInstructions();
}
};
#if defined(__linux__)
void reset() noexcept {
int fd = mCountersFd[0];
ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
}
void start() noexcept {
int fd = mCountersFd[0];
ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
}
void stop() noexcept {
int fd = mCountersFd[0];
ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
}
void readCounters(Counters* outCounters) noexcept {
Counters counters;
ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
memset(outCounters, 0, sizeof(Counters));
if (n > 0) {
outCounters->nr = counters.nr;
outCounters->time_enabled = counters.time_enabled;
outCounters->time_running = counters.time_running;
for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) {
if (mCountersFd[i] >= 0) {
outCounters->counters[i] = counters.counters[mIds[i]];
}
}
}
}
#else // !__linux__
void reset() noexcept { }
void start() noexcept { }
void stop() noexcept { }
void readCounters(Counters* counters) noexcept { }
#endif // __linux__
bool hasBranchRates() const noexcept {
return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
}
bool hasICacheRates() const noexcept {
return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
}
private:
Profiler() noexcept;
~Profiler() noexcept;
std::array<uint8_t, EVENT_COUNT> mIds;
std::array<int, EVENT_COUNT> mCountersFd;
uint32_t mEnabledEvents = 0;
};
} // namespace utils
#endif // TNT_UTILS_PROFILER_H