You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

201 lines
6.4 KiB

/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Profiler.h"
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <algorithm>
#include <iostream>
#if defined(__linux__)
#include <sys/syscall.h>
#ifdef __ARM_ARCH
enum ARMv8PmuPerfTypes{
// Common micro-architecture events
ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01,
ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14,
ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16,
ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17,
ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18,
};
#endif
static int perf_event_open(struct perf_event_attr* hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags) {
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
#endif // __linux__
namespace utils {
Profiler& Profiler::get() noexcept {
static Profiler sProfiler;
return sProfiler;
}
Profiler::Profiler() noexcept {
std::uninitialized_fill(mCountersFd.begin(), mCountersFd.end(), -1);
Profiler::resetEvents(EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES);
}
Profiler::~Profiler() noexcept {
for (int fd : mCountersFd) {
if (fd >= 0) {
close(fd);
}
}
}
uint32_t Profiler::resetEvents(uint32_t eventMask) noexcept {
// close all counters
for (int& fd : mCountersFd) {
if (fd >= 0) {
close(fd);
fd = -1;
}
}
mEnabledEvents = 0;
#if defined(__linux__)
struct perf_event_attr pe;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
pe.read_format = PERF_FORMAT_GROUP |
PERF_FORMAT_ID |
PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
uint8_t count = 0;
int fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd >= 0) {
const int groupFd = fd;
mIds[INSTRUCTIONS] = count++;
mCountersFd[INSTRUCTIONS] = fd;
pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
if (eventMask & EV_CPU_CYCLES) {
pe.type = PERF_TYPE_HARDWARE;
pe.config = PERF_COUNT_HW_CPU_CYCLES;
mCountersFd[CPU_CYCLES] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[CPU_CYCLES] > 0) {
mIds[CPU_CYCLES] = count++;
mEnabledEvents |= EV_CPU_CYCLES;
}
}
if (eventMask & EV_L1D_REFS) {
pe.type = PERF_TYPE_HARDWARE;
pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
mCountersFd[DCACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[DCACHE_REFS] > 0) {
mIds[DCACHE_REFS] = count++;
mEnabledEvents |= EV_L1D_REFS;
}
}
if (eventMask & EV_L1D_MISSES) {
pe.type = PERF_TYPE_HARDWARE;
pe.config = PERF_COUNT_HW_CACHE_MISSES;
mCountersFd[DCACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[DCACHE_MISSES] > 0) {
mIds[DCACHE_MISSES] = count++;
mEnabledEvents |= EV_L1D_MISSES;
}
}
if (eventMask & EV_BPU_REFS) {
pe.type = PERF_TYPE_HARDWARE;
pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
mCountersFd[BRANCHES] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[BRANCHES] > 0) {
mIds[BRANCHES] = count++;
mEnabledEvents |= EV_BPU_REFS;
}
}
if (eventMask & EV_BPU_MISSES) {
pe.type = PERF_TYPE_HARDWARE;
pe.config = PERF_COUNT_HW_BRANCH_MISSES;
mCountersFd[BRANCH_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[BRANCH_MISSES] > 0) {
mIds[BRANCH_MISSES] = count++;
mEnabledEvents |= EV_BPU_MISSES;
}
}
#ifdef __ARM_ARCH
if (eventMask & EV_L1I_REFS) {
pe.type = PERF_TYPE_RAW;
pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS;
mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[ICACHE_REFS] > 0) {
mIds[ICACHE_REFS] = count++;
mEnabledEvents |= EV_L1I_REFS;
}
}
if (eventMask & EV_L1I_MISSES) {
pe.type = PERF_TYPE_RAW;
pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL;
mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[ICACHE_MISSES] > 0) {
mIds[ICACHE_MISSES] = count++;
mEnabledEvents |= EV_L1I_MISSES;
}
}
#else
if (eventMask & EV_L1I_REFS) {
pe.type = PERF_TYPE_HW_CACHE;
pe.config = PERF_COUNT_HW_CACHE_L1I |
(PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS<<16);
mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[ICACHE_REFS] > 0) {
mIds[ICACHE_REFS] = count++;
mEnabledEvents |= EV_L1I_REFS;
}
}
if (eventMask & EV_L1I_MISSES) {
pe.type = PERF_TYPE_HW_CACHE;
pe.config = PERF_COUNT_HW_CACHE_L1I |
(PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_MISS<<16);
mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
if (mCountersFd[ICACHE_MISSES] > 0) {
mIds[ICACHE_MISSES] = count++;
mEnabledEvents |= EV_L1I_MISSES;
}
}
#endif
}
#endif // __linux__
return mEnabledEvents;
}
} // namespace utils