You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
201 lines
6.4 KiB
201 lines
6.4 KiB
/*
|
|
* Copyright (C) 2016 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "Profiler.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
|
|
#if defined(__linux__)
|
|
|
|
#include <sys/syscall.h>
|
|
|
|
#ifdef __ARM_ARCH
|
|
enum ARMv8PmuPerfTypes{
|
|
// Common micro-architecture events
|
|
ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01,
|
|
ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14,
|
|
ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16,
|
|
ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17,
|
|
ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18,
|
|
};
|
|
#endif
|
|
|
|
static int perf_event_open(struct perf_event_attr* hw_event, pid_t pid,
|
|
int cpu, int group_fd, unsigned long flags) {
|
|
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
|
|
}
|
|
|
|
#endif // __linux__
|
|
|
|
namespace utils {
|
|
|
|
Profiler& Profiler::get() noexcept {
|
|
static Profiler sProfiler;
|
|
return sProfiler;
|
|
}
|
|
|
|
Profiler::Profiler() noexcept {
|
|
std::uninitialized_fill(mCountersFd.begin(), mCountersFd.end(), -1);
|
|
Profiler::resetEvents(EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES);
|
|
}
|
|
|
|
Profiler::~Profiler() noexcept {
|
|
for (int fd : mCountersFd) {
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32_t Profiler::resetEvents(uint32_t eventMask) noexcept {
|
|
// close all counters
|
|
for (int& fd : mCountersFd) {
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
fd = -1;
|
|
}
|
|
}
|
|
mEnabledEvents = 0;
|
|
|
|
#if defined(__linux__)
|
|
|
|
struct perf_event_attr pe;
|
|
memset(&pe, 0, sizeof(struct perf_event_attr));
|
|
pe.type = PERF_TYPE_HARDWARE;
|
|
pe.size = sizeof(struct perf_event_attr);
|
|
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
|
|
pe.disabled = 1;
|
|
pe.exclude_kernel = 1;
|
|
pe.exclude_hv = 1;
|
|
pe.read_format = PERF_FORMAT_GROUP |
|
|
PERF_FORMAT_ID |
|
|
PERF_FORMAT_TOTAL_TIME_ENABLED |
|
|
PERF_FORMAT_TOTAL_TIME_RUNNING;
|
|
|
|
uint8_t count = 0;
|
|
int fd = perf_event_open(&pe, 0, -1, -1, 0);
|
|
if (fd >= 0) {
|
|
const int groupFd = fd;
|
|
mIds[INSTRUCTIONS] = count++;
|
|
mCountersFd[INSTRUCTIONS] = fd;
|
|
|
|
pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
|
|
|
if (eventMask & EV_CPU_CYCLES) {
|
|
pe.type = PERF_TYPE_HARDWARE;
|
|
pe.config = PERF_COUNT_HW_CPU_CYCLES;
|
|
mCountersFd[CPU_CYCLES] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[CPU_CYCLES] > 0) {
|
|
mIds[CPU_CYCLES] = count++;
|
|
mEnabledEvents |= EV_CPU_CYCLES;
|
|
}
|
|
}
|
|
|
|
if (eventMask & EV_L1D_REFS) {
|
|
pe.type = PERF_TYPE_HARDWARE;
|
|
pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
|
|
mCountersFd[DCACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[DCACHE_REFS] > 0) {
|
|
mIds[DCACHE_REFS] = count++;
|
|
mEnabledEvents |= EV_L1D_REFS;
|
|
}
|
|
}
|
|
|
|
if (eventMask & EV_L1D_MISSES) {
|
|
pe.type = PERF_TYPE_HARDWARE;
|
|
pe.config = PERF_COUNT_HW_CACHE_MISSES;
|
|
mCountersFd[DCACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[DCACHE_MISSES] > 0) {
|
|
mIds[DCACHE_MISSES] = count++;
|
|
mEnabledEvents |= EV_L1D_MISSES;
|
|
}
|
|
}
|
|
|
|
if (eventMask & EV_BPU_REFS) {
|
|
pe.type = PERF_TYPE_HARDWARE;
|
|
pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
|
|
mCountersFd[BRANCHES] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[BRANCHES] > 0) {
|
|
mIds[BRANCHES] = count++;
|
|
mEnabledEvents |= EV_BPU_REFS;
|
|
}
|
|
}
|
|
|
|
if (eventMask & EV_BPU_MISSES) {
|
|
pe.type = PERF_TYPE_HARDWARE;
|
|
pe.config = PERF_COUNT_HW_BRANCH_MISSES;
|
|
mCountersFd[BRANCH_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[BRANCH_MISSES] > 0) {
|
|
mIds[BRANCH_MISSES] = count++;
|
|
mEnabledEvents |= EV_BPU_MISSES;
|
|
}
|
|
}
|
|
|
|
#ifdef __ARM_ARCH
|
|
if (eventMask & EV_L1I_REFS) {
|
|
pe.type = PERF_TYPE_RAW;
|
|
pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS;
|
|
mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[ICACHE_REFS] > 0) {
|
|
mIds[ICACHE_REFS] = count++;
|
|
mEnabledEvents |= EV_L1I_REFS;
|
|
}
|
|
}
|
|
|
|
if (eventMask & EV_L1I_MISSES) {
|
|
pe.type = PERF_TYPE_RAW;
|
|
pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL;
|
|
mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[ICACHE_MISSES] > 0) {
|
|
mIds[ICACHE_MISSES] = count++;
|
|
mEnabledEvents |= EV_L1I_MISSES;
|
|
}
|
|
}
|
|
#else
|
|
if (eventMask & EV_L1I_REFS) {
|
|
pe.type = PERF_TYPE_HW_CACHE;
|
|
pe.config = PERF_COUNT_HW_CACHE_L1I |
|
|
(PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS<<16);
|
|
mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[ICACHE_REFS] > 0) {
|
|
mIds[ICACHE_REFS] = count++;
|
|
mEnabledEvents |= EV_L1I_REFS;
|
|
}
|
|
}
|
|
|
|
if (eventMask & EV_L1I_MISSES) {
|
|
pe.type = PERF_TYPE_HW_CACHE;
|
|
pe.config = PERF_COUNT_HW_CACHE_L1I |
|
|
(PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_MISS<<16);
|
|
mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
|
|
if (mCountersFd[ICACHE_MISSES] > 0) {
|
|
mIds[ICACHE_MISSES] = count++;
|
|
mEnabledEvents |= EV_L1I_MISSES;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
#endif // __linux__
|
|
return mEnabledEvents;
|
|
}
|
|
|
|
} // namespace utils
|