You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
3.9 KiB
128 lines
3.9 KiB
#!/usr/bin/env python
|
|
import argparse
|
|
from time import sleep, strftime
|
|
from sys import argv
|
|
import ctypes as ct
|
|
from bcc import BPF, USDT
|
|
import inspect
|
|
import os
|
|
|
|
# Parse command line arguments
|
|
parser = argparse.ArgumentParser(description="Trace the moving average of the latency of an operation using usdt probes.",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument("-p", "--pid", type=int, help="The id of the process to trace.")
|
|
parser.add_argument("-i", "--interval", type=int, help="The interval in seconds on which to report the latency distribution.")
|
|
parser.add_argument("-c", "--count", type=int, default=16, help="The count of samples over which to calculate the moving average.")
|
|
parser.add_argument("-f", "--filterstr", type=str, default="", help="The prefix filter for the operation input. If specified, only operations for which the input string starts with the filterstr are traced.")
|
|
parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="If true, will output verbose logging information.")
|
|
parser.set_defaults(verbose=False)
|
|
args = parser.parse_args()
|
|
this_pid = int(args.pid)
|
|
this_interval = int(args.interval)
|
|
this_count = int(args.count)
|
|
this_filter = str(args.filterstr)
|
|
|
|
if this_interval < 1:
|
|
print("Invalid value for interval, using 1.")
|
|
this_interval = 1
|
|
|
|
if this_count < 1:
|
|
print("Invalid value for count, using 1.")
|
|
this_count = 1
|
|
|
|
debugLevel=0
|
|
if args.verbose:
|
|
debugLevel=4
|
|
|
|
# BPF program
|
|
bpf_text_shared = "%s/bpf_text_shared.c" % os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
|
bpf_text = open(bpf_text_shared, 'r').read()
|
|
bpf_text += """
|
|
|
|
const u32 MAX_SAMPLES = SAMPLE_COUNT;
|
|
|
|
struct hash_key_t
|
|
{
|
|
char input[64];
|
|
};
|
|
|
|
struct hash_leaf_t
|
|
{
|
|
u32 count;
|
|
u64 total;
|
|
u64 average;
|
|
};
|
|
|
|
/**
|
|
* @brief Contains the averages for the operation latencies by operation input.
|
|
*/
|
|
BPF_HASH(lat_hash, struct hash_key_t, struct hash_leaf_t, 512);
|
|
|
|
/**
|
|
* @brief Reads the operation response arguments, calculates the latency, and stores it in the histogram.
|
|
* @param ctx The BPF context.
|
|
*/
|
|
int trace_operation_end(struct pt_regs* ctx)
|
|
{
|
|
u64 operation_id;
|
|
bpf_usdt_readarg(1, ctx, &operation_id);
|
|
|
|
struct start_data_t* start_data = start_hash.lookup(&operation_id);
|
|
if (0 == start_data) {
|
|
return 0;
|
|
}
|
|
|
|
u64 duration = bpf_ktime_get_ns() - start_data->start;
|
|
struct hash_key_t hash_key = {};
|
|
__builtin_memcpy(&hash_key.input, start_data->input, sizeof(hash_key.input));
|
|
start_hash.delete(&operation_id);
|
|
|
|
struct hash_leaf_t zero = {};
|
|
struct hash_leaf_t* hash_leaf = lat_hash.lookup_or_init(&hash_key, &zero);
|
|
if (0 == hash_leaf) {
|
|
return 0;
|
|
}
|
|
|
|
if (hash_leaf->count < MAX_SAMPLES) {
|
|
hash_leaf->count++;
|
|
} else {
|
|
hash_leaf->total -= hash_leaf->average;
|
|
}
|
|
|
|
hash_leaf->total += duration;
|
|
hash_leaf->average = hash_leaf->total / hash_leaf->count;
|
|
|
|
return 0;
|
|
}
|
|
"""
|
|
|
|
bpf_text = bpf_text.replace("SAMPLE_COUNT", str(this_count))
|
|
bpf_text = bpf_text.replace("FILTER_STRING", this_filter)
|
|
if this_filter:
|
|
bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }")
|
|
else:
|
|
bpf_text = bpf_text.replace("FILTER", "")
|
|
|
|
# Create USDT context
|
|
print("Attaching probes to pid %d" % this_pid)
|
|
usdt_ctx = USDT(pid=this_pid)
|
|
usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start")
|
|
usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end")
|
|
|
|
# Create BPF context, load BPF program
|
|
bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel)
|
|
|
|
print("Tracing... Hit Ctrl-C to end.")
|
|
|
|
lat_hash = bpf_ctx.get_table("lat_hash")
|
|
while (1):
|
|
try:
|
|
sleep(this_interval)
|
|
except KeyboardInterrupt:
|
|
exit()
|
|
|
|
print("[%s]" % strftime("%H:%M:%S"))
|
|
print("%-64s %8s %16s" % ("input", "count", "latency (us)"))
|
|
for k, v in lat_hash.items():
|
|
print("%-64s %8d %16d" % (k.input, v.count, v.average / 1000))
|