You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
225 lines
6.2 KiB
225 lines
6.2 KiB
#!/usr/bin/env python
|
|
# @lint-avoid-python-3-compatibility-imports
|
|
#
|
|
# ext4dist Summarize ext4 operation latency.
|
|
# For Linux, uses BCC, eBPF.
|
|
#
|
|
# USAGE: ext4dist [-h] [-T] [-m] [-p PID] [interval] [count]
|
|
#
|
|
# Copyright 2016 Netflix, Inc.
|
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
#
|
|
# 12-Feb-2016 Brendan Gregg Created this.
|
|
|
|
from __future__ import print_function
|
|
from bcc import BPF
|
|
from time import sleep, strftime
|
|
import argparse
|
|
|
|
# symbols
|
|
kallsyms = "/proc/kallsyms"
|
|
|
|
# arguments
|
|
examples = """examples:
|
|
./ext4dist # show operation latency as a histogram
|
|
./ext4dist -p 181 # trace PID 181 only
|
|
./ext4dist 1 10 # print 1 second summaries, 10 times
|
|
./ext4dist -m 5 # 5s summaries, milliseconds
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Summarize ext4 operation latency",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=examples)
|
|
parser.add_argument("-T", "--notimestamp", action="store_true",
|
|
help="don't include timestamp on interval output")
|
|
parser.add_argument("-m", "--milliseconds", action="store_true",
|
|
help="output in milliseconds")
|
|
parser.add_argument("-p", "--pid",
|
|
help="trace this PID only")
|
|
parser.add_argument("interval", nargs="?",
|
|
help="output interval, in seconds")
|
|
parser.add_argument("count", nargs="?", default=99999999,
|
|
help="number of outputs")
|
|
parser.add_argument("--ebpf", action="store_true",
|
|
help=argparse.SUPPRESS)
|
|
args = parser.parse_args()
|
|
pid = args.pid
|
|
countdown = int(args.count)
|
|
if args.milliseconds:
|
|
factor = 1000000
|
|
label = "msecs"
|
|
else:
|
|
factor = 1000
|
|
label = "usecs"
|
|
if args.interval and int(args.interval) == 0:
|
|
print("ERROR: interval 0. Exiting.")
|
|
exit()
|
|
debug = 0
|
|
|
|
# define BPF program
|
|
bpf_text = """
|
|
#include <uapi/linux/ptrace.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/sched.h>
|
|
|
|
#define OP_NAME_LEN 8
|
|
typedef struct dist_key {
|
|
char op[OP_NAME_LEN];
|
|
u64 slot;
|
|
} dist_key_t;
|
|
BPF_HASH(start, u32);
|
|
BPF_HISTOGRAM(dist, dist_key_t);
|
|
|
|
// time operation
|
|
int trace_entry(struct pt_regs *ctx)
|
|
{
|
|
u32 pid = bpf_get_current_pid_tgid();
|
|
if (FILTER_PID)
|
|
return 0;
|
|
u64 ts = bpf_ktime_get_ns();
|
|
start.update(&pid, &ts);
|
|
return 0;
|
|
}
|
|
|
|
// The current ext4 (Linux 4.5) uses generic_file_read_iter(), instead of it's
|
|
// own function, for reads. So we need to trace that and then filter on ext4,
|
|
// which I do by checking file->f_op.
|
|
int trace_read_entry(struct pt_regs *ctx, struct kiocb *iocb)
|
|
{
|
|
u32 pid = bpf_get_current_pid_tgid();
|
|
if (FILTER_PID)
|
|
return 0;
|
|
|
|
// ext4 filter on file->f_op == ext4_file_operations
|
|
struct file *fp = iocb->ki_filp;
|
|
if ((u64)fp->f_op != EXT4_FILE_OPERATIONS)
|
|
return 0;
|
|
|
|
u64 ts = bpf_ktime_get_ns();
|
|
start.update(&pid, &ts);
|
|
return 0;
|
|
}
|
|
|
|
static int trace_return(struct pt_regs *ctx, const char *op)
|
|
{
|
|
u64 *tsp;
|
|
u32 pid = bpf_get_current_pid_tgid();
|
|
|
|
// fetch timestamp and calculate delta
|
|
tsp = start.lookup(&pid);
|
|
if (tsp == 0) {
|
|
return 0; // missed start or filtered
|
|
}
|
|
u64 delta = bpf_ktime_get_ns() - *tsp;
|
|
start.delete(&pid);
|
|
|
|
// Skip entries with backwards time: temp workaround for #728
|
|
if ((s64) delta < 0)
|
|
return 0;
|
|
|
|
delta /= FACTOR;
|
|
|
|
// store as histogram
|
|
dist_key_t key = {.slot = bpf_log2l(delta)};
|
|
__builtin_memcpy(&key.op, op, sizeof(key.op));
|
|
dist.increment(key);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int trace_read_return(struct pt_regs *ctx)
|
|
{
|
|
char *op = "read";
|
|
return trace_return(ctx, op);
|
|
}
|
|
|
|
int trace_write_return(struct pt_regs *ctx)
|
|
{
|
|
char *op = "write";
|
|
return trace_return(ctx, op);
|
|
}
|
|
|
|
int trace_open_return(struct pt_regs *ctx)
|
|
{
|
|
char *op = "open";
|
|
return trace_return(ctx, op);
|
|
}
|
|
|
|
int trace_fsync_return(struct pt_regs *ctx)
|
|
{
|
|
char *op = "fsync";
|
|
return trace_return(ctx, op);
|
|
}
|
|
"""
|
|
|
|
# code replacements
|
|
with open(kallsyms) as syms:
|
|
ops = ''
|
|
for line in syms:
|
|
(addr, size, name) = line.rstrip().split(" ", 2)
|
|
name = name.split("\t")[0]
|
|
if name == "ext4_file_operations":
|
|
ops = "0x" + addr
|
|
break
|
|
if ops == '':
|
|
print("ERROR: no ext4_file_operations in /proc/kallsyms. Exiting.")
|
|
print("HINT: the kernel should be built with CONFIG_KALLSYMS_ALL.")
|
|
exit()
|
|
bpf_text = bpf_text.replace('EXT4_FILE_OPERATIONS', ops)
|
|
bpf_text = bpf_text.replace('FACTOR', str(factor))
|
|
if args.pid:
|
|
bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid)
|
|
else:
|
|
bpf_text = bpf_text.replace('FILTER_PID', '0')
|
|
if debug or args.ebpf:
|
|
print(bpf_text)
|
|
if args.ebpf:
|
|
exit()
|
|
|
|
# load BPF program
|
|
b = BPF(text=bpf_text)
|
|
|
|
# Common file functions. See earlier comment about generic_file_read_iter().
|
|
# Comment by Joe Yin
|
|
# From Linux 4.10, the function .read_iter at the ext4_file_operations has
|
|
# changed to ext4_file_read_iter.
|
|
# So, I add get_kprobe_functions(b'ext4_file_read_iter'),it will first to attach ext4_file_read_iter,
|
|
# if fails and will attach the generic_file_read_iter which used to pre-4.10.
|
|
|
|
if BPF.get_kprobe_functions(b'ext4_file_read_iter'):
|
|
b.attach_kprobe(event="ext4_file_read_iter", fn_name="trace_entry")
|
|
else:
|
|
b.attach_kprobe(event="generic_file_read_iter", fn_name="trace_read_entry")
|
|
b.attach_kprobe(event="ext4_file_write_iter", fn_name="trace_entry")
|
|
b.attach_kprobe(event="ext4_file_open", fn_name="trace_entry")
|
|
b.attach_kprobe(event="ext4_sync_file", fn_name="trace_entry")
|
|
b.attach_kretprobe(event="generic_file_read_iter", fn_name="trace_read_return")
|
|
b.attach_kretprobe(event="ext4_file_write_iter", fn_name="trace_write_return")
|
|
b.attach_kretprobe(event="ext4_file_open", fn_name="trace_open_return")
|
|
b.attach_kretprobe(event="ext4_sync_file", fn_name="trace_fsync_return")
|
|
|
|
print("Tracing ext4 operation latency... Hit Ctrl-C to end.")
|
|
|
|
# output
|
|
exiting = 0
|
|
dist = b.get_table("dist")
|
|
while (1):
|
|
try:
|
|
if args.interval:
|
|
sleep(int(args.interval))
|
|
else:
|
|
sleep(99999999)
|
|
except KeyboardInterrupt:
|
|
exiting = 1
|
|
|
|
print()
|
|
if args.interval and (not args.notimestamp):
|
|
print(strftime("%H:%M:%S:"))
|
|
|
|
dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode)
|
|
dist.clear()
|
|
|
|
countdown -= 1
|
|
if exiting or countdown == 0:
|
|
exit()
|