You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
256 lines
7.6 KiB
256 lines
7.6 KiB
#!/usr/bin/env python
|
|
# @lint-avoid-python-3-compatibility-imports
|
|
#
|
|
# runqlen Summarize scheduler run queue length as a histogram.
|
|
# For Linux, uses BCC, eBPF.
|
|
#
|
|
# This counts the length of the run queue, excluding the currently running
|
|
# thread, and shows it as a histogram.
|
|
#
|
|
# Also answers run queue occupancy.
|
|
#
|
|
# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
|
|
#
|
|
# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
|
|
# a version of this tool that may work on Linux 4.6 - 4.8.
|
|
#
|
|
# Copyright 2016 Netflix, Inc.
|
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
#
|
|
# 12-Dec-2016 Brendan Gregg Created this.
|
|
|
|
from __future__ import print_function
|
|
from bcc import BPF, PerfType, PerfSWConfig
|
|
from time import sleep, strftime
|
|
from tempfile import NamedTemporaryFile
|
|
from os import open, close, dup, unlink, O_WRONLY
|
|
import argparse
|
|
|
|
# arguments
|
|
examples = """examples:
|
|
./runqlen # summarize run queue length as a histogram
|
|
./runqlen 1 10 # print 1 second summaries, 10 times
|
|
./runqlen -T 1 # 1s summaries and timestamps
|
|
./runqlen -O # report run queue occupancy
|
|
./runqlen -C # show each CPU separately
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Summarize scheduler run queue length as a histogram",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=examples)
|
|
parser.add_argument("-T", "--timestamp", action="store_true",
|
|
help="include timestamp on output")
|
|
parser.add_argument("-O", "--runqocc", action="store_true",
|
|
help="report run queue occupancy")
|
|
parser.add_argument("-C", "--cpus", action="store_true",
|
|
help="print output for each CPU separately")
|
|
parser.add_argument("interval", nargs="?", default=99999999,
|
|
help="output interval, in seconds")
|
|
parser.add_argument("count", nargs="?", default=99999999,
|
|
help="number of outputs")
|
|
parser.add_argument("--ebpf", action="store_true",
|
|
help=argparse.SUPPRESS)
|
|
args = parser.parse_args()
|
|
countdown = int(args.count)
|
|
debug = 0
|
|
frequency = 99
|
|
|
|
# Linux 4.15 introduced a new field runnable_weight
|
|
# in linux_src:kernel/sched/sched.h as
|
|
# struct cfs_rq {
|
|
# struct load_weight load;
|
|
# unsigned long runnable_weight;
|
|
# unsigned int nr_running, h_nr_running;
|
|
# ......
|
|
# }
|
|
# and this tool requires to access nr_running to get
|
|
# runqueue len information.
|
|
#
|
|
# The commit which introduces cfs_rq->runnable_weight
|
|
# field also introduces the field sched_entity->runnable_weight
|
|
# where sched_entity is defined in linux_src:include/linux/sched.h.
|
|
#
|
|
# To cope with pre-4.15 and 4.15/post-4.15 releases,
|
|
# we run a simple BPF program to detect whether
|
|
# field sched_entity->runnable_weight exists. The existence of
|
|
# this field should infer the existence of cfs_rq->runnable_weight.
|
|
#
|
|
# This will need maintenance as the relationship between these
|
|
# two fields may change in the future.
|
|
#
|
|
def check_runnable_weight_field():
|
|
# Define the bpf program for checking purpose
|
|
bpf_check_text = """
|
|
#include <linux/sched.h>
|
|
unsigned long dummy(struct sched_entity *entity)
|
|
{
|
|
return entity->runnable_weight;
|
|
}
|
|
"""
|
|
|
|
# Get a temporary file name
|
|
tmp_file = NamedTemporaryFile(delete=False)
|
|
tmp_file.close();
|
|
|
|
# Duplicate and close stderr (fd = 2)
|
|
old_stderr = dup(2)
|
|
close(2)
|
|
|
|
# Open a new file, should get fd number 2
|
|
# This will avoid printing llvm errors on the screen
|
|
fd = open(tmp_file.name, O_WRONLY)
|
|
try:
|
|
t = BPF(text=bpf_check_text)
|
|
success_compile = True
|
|
except:
|
|
success_compile = False
|
|
|
|
# Release the fd 2, and next dup should restore old stderr
|
|
close(fd)
|
|
dup(old_stderr)
|
|
close(old_stderr)
|
|
|
|
# remove the temporary file and return
|
|
unlink(tmp_file.name)
|
|
return success_compile
|
|
|
|
|
|
# define BPF program
|
|
bpf_text = """
|
|
#include <uapi/linux/ptrace.h>
|
|
#include <linux/sched.h>
|
|
|
|
// Declare enough of cfs_rq to find nr_running, since we can't #import the
|
|
// header. This will need maintenance. It is from kernel/sched/sched.h:
|
|
struct cfs_rq_partial {
|
|
struct load_weight load;
|
|
RUNNABLE_WEIGHT_FIELD
|
|
unsigned int nr_running, h_nr_running;
|
|
};
|
|
|
|
typedef struct cpu_key {
|
|
int cpu;
|
|
unsigned int slot;
|
|
} cpu_key_t;
|
|
STORAGE
|
|
|
|
int do_perf_event()
|
|
{
|
|
unsigned int len = 0;
|
|
pid_t pid = 0;
|
|
struct task_struct *task = NULL;
|
|
struct cfs_rq_partial *my_q = NULL;
|
|
|
|
// Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
|
|
// unstable interface and may need maintenance. Perhaps a future version
|
|
// of BPF will support task_rq(p) or something similar as a more reliable
|
|
// interface.
|
|
task = (struct task_struct *)bpf_get_current_task();
|
|
my_q = (struct cfs_rq_partial *)task->se.cfs_rq;
|
|
len = my_q->nr_running;
|
|
|
|
// Calculate run queue length by subtracting the currently running task,
|
|
// if present. len 0 == idle, len 1 == one running task.
|
|
if (len > 0)
|
|
len--;
|
|
|
|
STORE
|
|
|
|
return 0;
|
|
}
|
|
"""
|
|
|
|
# code substitutions
|
|
if args.cpus:
|
|
bpf_text = bpf_text.replace('STORAGE',
|
|
'BPF_HISTOGRAM(dist, cpu_key_t);')
|
|
bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
|
|
'key.cpu = bpf_get_smp_processor_id(); ' +
|
|
'dist.increment(key);')
|
|
else:
|
|
bpf_text = bpf_text.replace('STORAGE',
|
|
'BPF_HISTOGRAM(dist, unsigned int);')
|
|
bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')
|
|
|
|
if check_runnable_weight_field():
|
|
bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;')
|
|
else:
|
|
bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '')
|
|
|
|
if debug or args.ebpf:
|
|
print(bpf_text)
|
|
if args.ebpf:
|
|
exit()
|
|
|
|
# initialize BPF & perf_events
|
|
b = BPF(text=bpf_text)
|
|
b.attach_perf_event(ev_type=PerfType.SOFTWARE,
|
|
ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
|
|
sample_period=0, sample_freq=frequency)
|
|
|
|
print("Sampling run queue length... Hit Ctrl-C to end.")
|
|
|
|
# output
|
|
exiting = 0 if args.interval else 1
|
|
dist = b.get_table("dist")
|
|
while (1):
|
|
try:
|
|
sleep(int(args.interval))
|
|
except KeyboardInterrupt:
|
|
exiting = 1
|
|
|
|
print()
|
|
if args.timestamp:
|
|
print("%-8s\n" % strftime("%H:%M:%S"), end="")
|
|
|
|
if args.runqocc:
|
|
if args.cpus:
|
|
# run queue occupancy, per-CPU summary
|
|
idle = {}
|
|
queued = {}
|
|
cpumax = 0
|
|
for k, v in dist.items():
|
|
if k.cpu > cpumax:
|
|
cpumax = k.cpu
|
|
for c in range(0, cpumax + 1):
|
|
idle[c] = 0
|
|
queued[c] = 0
|
|
for k, v in dist.items():
|
|
if k.slot == 0:
|
|
idle[k.cpu] += v.value
|
|
else:
|
|
queued[k.cpu] += v.value
|
|
for c in range(0, cpumax + 1):
|
|
samples = idle[c] + queued[c]
|
|
if samples:
|
|
runqocc = float(queued[c]) / samples
|
|
else:
|
|
runqocc = 0
|
|
print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))
|
|
|
|
else:
|
|
# run queue occupancy, system-wide summary
|
|
idle = 0
|
|
queued = 0
|
|
for k, v in dist.items():
|
|
if k.value == 0:
|
|
idle += v.value
|
|
else:
|
|
queued += v.value
|
|
samples = idle + queued
|
|
if samples:
|
|
runqocc = float(queued) / samples
|
|
else:
|
|
runqocc = 0
|
|
print("runqocc: %0.2f%%" % (100 * runqocc))
|
|
|
|
else:
|
|
# run queue length histograms
|
|
dist.print_linear_hist("runqlen", "cpu")
|
|
|
|
dist.clear()
|
|
|
|
countdown -= 1
|
|
if exiting or countdown == 0:
|
|
exit()
|