You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
3.8 KiB
125 lines
3.8 KiB
#include <stdio.h>
|
|
#include <sys/time.h>
|
|
#include <getopt.h>
|
|
|
|
#include <thread>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
|
|
#include <sched.h>
|
|
|
|
#include "Profiler.h"
|
|
|
|
extern "C" void icache_test(long count, long step);
|
|
|
|
static constexpr size_t MAX_CODE_SIZE = 128*1024;
|
|
static constexpr size_t CACHE_LINE_SIZE = 64;
|
|
static constexpr size_t MAX_ITERATIONS_COUNT = MAX_CODE_SIZE / CACHE_LINE_SIZE;
|
|
static constexpr size_t REPETITIONS = 0x800000L;
|
|
|
|
|
|
using namespace utils;
|
|
|
|
static cpu_set_t g_cpu_set;
|
|
|
|
static void printUsage(char* name) {
|
|
std::string exec_name(name);
|
|
std::string usage(
|
|
"ICACHE is a command-line tool for testing the L1 instruction cache performance.\n"
|
|
"(Make sure security.perf_harden is set to 0)\n\n"
|
|
"Usages:\n"
|
|
" ICACHE [options]\n"
|
|
"\n"
|
|
"Options:\n"
|
|
" --help, -h\n"
|
|
" print this message\n\n"
|
|
" --affinity=N, -a N\n"
|
|
" Specify which CPU the test should run on.\n\n"
|
|
);
|
|
const std::string from("ICACHE");
|
|
for (size_t pos = usage.find(from); pos != std::string::npos; pos = usage.find(from, pos)) {
|
|
usage.replace(pos, from.length(), exec_name);
|
|
}
|
|
printf("%s", usage.c_str());
|
|
}
|
|
|
|
static int handleCommandLineArgments(int argc, char* argv[]) {
|
|
static constexpr const char* OPTSTR = "ha:";
|
|
static const struct option OPTIONS[] = {
|
|
{ "help", no_argument, 0, 'h' },
|
|
{ "affinity", required_argument, 0, 'a' },
|
|
{ 0, 0, 0, 0 } // termination of the option list
|
|
};
|
|
int opt;
|
|
int option_index = 0;
|
|
while ((opt = getopt_long(argc, argv, OPTSTR, OPTIONS, &option_index)) >= 0) {
|
|
std::string arg(optarg ? optarg : "");
|
|
switch (opt) {
|
|
default:
|
|
case 'h':
|
|
printUsage(argv[0]);
|
|
exit(0);
|
|
break;
|
|
case 'a':
|
|
size_t cpu = std::stoi(arg);
|
|
if (cpu < std::thread::hardware_concurrency()) {
|
|
CPU_SET(cpu, &g_cpu_set);
|
|
} else {
|
|
std::cerr << "N must be < " << std::thread::hardware_concurrency() << std::endl;
|
|
exit(0);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return optind;
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
CPU_ZERO(&g_cpu_set);
|
|
|
|
[[maybe_unused]] int option_index = handleCommandLineArgments(argc, argv);
|
|
[[maybe_unused]] int num_args = argc - option_index;
|
|
|
|
if (CPU_COUNT(&g_cpu_set)) {
|
|
sched_setaffinity(gettid(), sizeof(g_cpu_set), &g_cpu_set);
|
|
}
|
|
|
|
Profiler& profiler = Profiler::get();
|
|
profiler.resetEvents(Profiler::EV_CPU_CYCLES | Profiler::EV_L1I_RATES);
|
|
|
|
if (!profiler.isValid()) {
|
|
fprintf(stderr, "performance counters not enabled. try \"setprop security.perf_harden 0\"\n");
|
|
exit(0);
|
|
}
|
|
|
|
size_t const stepInBytes = 1024; // 1 KiB steps
|
|
size_t const step = stepInBytes / CACHE_LINE_SIZE;
|
|
|
|
std::cout << std::fixed << std::setprecision(2);
|
|
|
|
printf("[KiB]\t[cyc]\t[refs]\t[MPKI]\t[ns]\n");
|
|
|
|
Profiler::Counters counters;
|
|
|
|
for (size_t i=step ; i <= MAX_ITERATIONS_COUNT ; i += step) {
|
|
profiler.reset();
|
|
|
|
auto now = std::chrono::steady_clock::now();
|
|
profiler.start();
|
|
icache_test(REPETITIONS, i);
|
|
profiler.stop();
|
|
auto duration = std::chrono::steady_clock::now() - now;
|
|
|
|
profiler.readCounters(&counters);
|
|
|
|
std::cout << ((i*CACHE_LINE_SIZE)/1024) << "\t"
|
|
<< counters.getCpuCycles()/double(REPETITIONS) << "\t"
|
|
<< counters.getL1IReferences()/double(REPETITIONS) << "\t"
|
|
<< counters.getMPKI(counters.getL1IMisses()) << "\t"
|
|
<< duration.count()/double(REPETITIONS) << "\t"
|
|
<< std::endl;
|
|
}
|
|
|
|
return 0;
|
|
}
|