You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1487 lines
36 KiB

#! /usr/bin/perl
#
# Copyright © 2017 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
use strict;
use warnings;
use 5.010;
my $gid = 0;
my (%db, %vdb, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait,
%ctxtimelines);
my (%cids, %ctxmap);
my $cid = 0;
my %queues;
my @freqs;
use constant VENG => '255:254';
my $max_requests = 1000;
my $width_us = 32000;
my $correct_durations = 0;
my %ignore_ring;
my %skip_box;
my $html = 0;
my $trace = 0;
my $avg_delay_stats = 0;
my $gpu_timeline = 0;
my $colour_contexts = 0;
my @args;
sub arg_help
{
return unless scalar(@_);
if ($_[0] eq '--help' or $_[0] eq '-h') {
shift @_;
print <<ENDHELP;
Notes:
The tool parse the output generated by the 'perf script' command after the
correct set of i915 tracepoints have been collected via perf record.
To collect the data:
./trace.pl --trace [command-to-be-profiled]
The above will invoke perf record, or alternatively it can be done directly:
perf record -a -c 1 -e i915:intel_gpu_freq_change, \
i915:i915_request_add, \
i915:i915_request_submit, \
i915:i915_request_in, \
i915:i915_request_out, \
dma_fence:dma_fence_signaled, \
i915:i915_request_wait_begin, \
i915:i915_request_wait_end \
[command-to-be-profiled]
Then create the log file with:
perf script >trace.log
This file in turn should be piped into this tool which will generate some
statistics out of it, or if --html was given HTML output.
HTML can be viewed from a directory containing the 'vis' JavaScript module.
On Ubuntu this can be installed like this:
apt-get install npm
npm install vis
Usage:
trace.pl <options> <input-file >output-file
--help / -h This help text
--max-items=num / -m num Maximum number of boxes to put on the
timeline. More boxes means more work for
the JavaScript engine in the browser.
--zoom-width-ms=ms / -z ms Width of the initial timeline zoom
--split-requests / -s Try to split out request which were
submitted together due coalescing in the
driver. May not be 100% accurate and may
influence the per-engine statistics so
use with care.
--ignore-ring=id / -i id Ignore ring with the numerical id when
parsing the log (enum intel_engine_id).
Can be given multiple times.
--skip-box=name / -x name Do not put a certain type of a box on
the timeline. One of: queue, ready,
execute and ctxsave.
Can be given multiple times.
--html Generate HTML output.
--trace cmd Trace the following command.
--avg-delay-stats Print average delay stats.
--gpu-timeline Draw overall GPU busy timeline.
--colour-contexts / -c Use different colours for different
context execution boxes.
ENDHELP
exit 0;
}
return @_;
}
sub arg_html
{
return unless scalar(@_);
if ($_[0] eq '--html') {
shift @_;
$html = 1;
}
return @_;
}
sub arg_avg_delay_stats
{
return unless scalar(@_);
if ($_[0] eq '--avg-delay-stats') {
shift @_;
$avg_delay_stats = 1;
}
return @_;
}
sub arg_gpu_timeline
{
return unless scalar(@_);
if ($_[0] eq '--gpu-timeline') {
shift @_;
$gpu_timeline = 1;
}
return @_;
}
sub arg_trace
{
my @events = ( 'i915:intel_gpu_freq_change',
'i915:i915_request_add',
'i915:i915_request_submit',
'i915:i915_request_in',
'i915:i915_request_out',
'dma_fence:dma_fence_signaled',
'i915:i915_request_wait_begin',
'i915:i915_request_wait_end' );
return unless scalar(@_);
if ($_[0] eq '--trace') {
shift @_;
unshift @_, '--';
unshift @_, join(',', @events);
unshift @_, ('perf', 'record', '-a', '-c', '1', '-q', '-o', 'perf.data', '-e');
exec @_;
}
return @_;
}
sub arg_max_requests
{
my $val;
return unless scalar(@_);
if ($_[0] eq '--max-requests' or $_[0] eq '-m') {
shift @_;
$val = shift @_;
} elsif ($_[0] =~ /--max-requests=(\d+)/) {
shift @_;
$val = $1;
}
$max_requests = int($val) if defined $val;
return @_;
}
sub arg_zoom_width
{
my $val;
return unless scalar(@_);
if ($_[0] eq '--zoom-width-ms' or $_[0] eq '-z') {
shift @_;
$val = shift @_;
} elsif ($_[0] =~ /--zoom-width-ms=(\d+)/) {
shift @_;
$val = $1;
}
$width_us = int($val) * 1000 if defined $val;
return @_;
}
sub arg_split_requests
{
return unless scalar(@_);
if ($_[0] eq '--split-requests' or $_[0] eq '-s') {
shift @_;
$correct_durations = 1;
}
return @_;
}
sub arg_ignore_ring
{
my $val;
return unless scalar(@_);
if ($_[0] eq '--ignore-ring' or $_[0] eq '-i') {
shift @_;
$val = shift @_;
} elsif ($_[0] =~ /--ignore-ring=(\d+)/) {
shift @_;
$val = $1;
}
$ignore_ring{$val} = 1 if defined $val;
return @_;
}
sub arg_skip_box
{
my $val;
return unless scalar(@_);
if ($_[0] eq '--skip-box' or $_[0] eq '-x') {
shift @_;
$val = shift @_;
} elsif ($_[0] =~ /--skip-box=(\d+)/) {
shift @_;
$val = $1;
}
$skip_box{$val} = 1 if defined $val;
return @_;
}
sub arg_colour_contexts
{
return unless scalar(@_);
if ($_[0] eq '--colour-contexts' or
$_[0] eq '--color-contexts' or
$_[0] eq '-c') {
shift @_;
$colour_contexts = 1;
}
return @_;
}
@args = @ARGV;
while (@args) {
my $left = scalar(@args);
@args = arg_help(@args);
@args = arg_html(@args);
@args = arg_avg_delay_stats(@args);
@args = arg_gpu_timeline(@args);
@args = arg_trace(@args);
@args = arg_max_requests(@args);
@args = arg_zoom_width(@args);
@args = arg_split_requests(@args);
@args = arg_ignore_ring(@args);
@args = arg_skip_box(@args);
@args = arg_colour_contexts(@args);
last if $left == scalar(@args);
}
die if scalar(@args);
@ARGV = @args;
sub db_key
{
my ($ring, $ctx, $seqno) = @_;
return $ring . '/' . $ctx . '/' . $seqno;
}
sub notify_key
{
my ($ctx, $seqno) = @_;
return $ctx . '/' . $seqno;
}
sub sanitize_ctx
{
my ($ctx, $ring) = @_;
if (exists $ctxdb{$ctx} and $ctxdb{$ctx} > 1) {
return $ctx . '.' . $ctxdb{$ctx};
} else {
return $ctx;
}
}
sub is_veng
{
my ($class, $instance) = split ':', shift;
return $instance eq '254';
}
# Main input loop - parse lines and build the internal representation of the
# trace using a hash of requests and some auxilliary data structures.
my $prev_freq = 0;
my $prev_freq_ts = 0;
while (<>) {
my @fields;
my $tp_name;
my %tp;
my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
chomp;
@fields = split ' ';
chop $fields[3];
$time = int($fields[3] * 1000000.0 + 0.5);
$tp_name = $fields[4];
splice @fields, 0, 5;
foreach my $f (@fields) {
my ($k, $v);
next unless $f =~ m/=/;
($k, $v) = ($`, $');
$k = 'global' if $k eq 'global_seqno';
chop $v if substr($v, -1, 1) eq ',';
$tp{$k} = $v;
$tp{'ring'} = $tp{'engine'} if $k eq 'engine';
}
next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
if (exists $tp{'ring'} and exists $tp{'seqno'}) {
$ring = $tp{'ring'};
$seqno = $tp{'seqno'};
if (exists $tp{'ctx'}) {
$ctx = $tp{'ctx'};
$orig_ctx = $ctx;
$ctx = sanitize_ctx($ctx, $ring);
$ring = VENG if is_veng($ring);
$key = db_key($ring, $ctx, $seqno);
}
}
if ($tp_name eq 'i915:i915_request_wait_begin:') {
my %rw;
next if exists $reqwait{$key};
die if $ring eq VENG and not exists $queues{$ctx};
$rw{'key'} = $key;
$rw{'ring'} = $ring;
$rw{'seqno'} = $seqno;
$rw{'ctx'} = $ctx;
$rw{'start'} = $time;
$reqwait{$key} = \%rw;
} elsif ($tp_name eq 'i915:i915_request_wait_end:') {
die if $ring eq VENG and not exists $queues{$ctx};
if (exists $reqwait{$key}) {
$reqwait{$key}->{'end'} = $time;
} else { # Virtual engine
my $vkey = db_key(VENG, $ctx, $seqno);
die unless exists $reqwait{$vkey};
# If the wait started on the virtual engine, attribute
# it to it completely.
$reqwait{$vkey}->{'end'} = $time;
}
} elsif ($tp_name eq 'i915:i915_request_add:') {
if (exists $queue{$key}) {
$ctxdb{$orig_ctx}++;
$ctx = sanitize_ctx($orig_ctx, $ring);
$key = db_key($ring, $ctx, $seqno);
} else {
$ctxdb{$orig_ctx} = 1;
}
$queue{$key} = $time;
if ($ring eq VENG and not exists $queues{$ctx}) {
$queues{$ctx} = 1 ;
$cids{$ctx} = $cid++;
$ctxmap{$cids{$ctx}} = $ctx;
}
} elsif ($tp_name eq 'i915:i915_request_submit:') {
die if exists $submit{$key};
die unless exists $queue{$key};
die if $ring eq VENG and not exists $queues{$ctx};
$submit{$key} = $time;
} elsif ($tp_name eq 'i915:i915_request_in:') {
my ($q, $s);
my %req;
# preemption
delete $db{$key} if exists $db{$key};
unless (exists $queue{$key}) {
# Virtual engine
my $vkey = db_key(VENG, $ctx, $seqno);
my %req;
die unless exists $queues{$ctx};
die unless exists $queue{$vkey};
die unless exists $submit{$vkey};
# Create separate request record on the queue timeline
$q = $queue{$vkey};
$s = $submit{$vkey};
$req{'queue'} = $q;
$req{'submit'} = $s;
$req{'start'} = $time;
$req{'end'} = $time;
$req{'ring'} = VENG;
$req{'seqno'} = $seqno;
$req{'ctx'} = $ctx;
$req{'name'} = $ctx . '/' . $seqno;
$req{'global'} = $tp{'global'};
$req{'port'} = $tp{'port'};
$vdb{$vkey} = \%req;
} else {
$q = $queue{$key};
$s = $submit{$key};
}
$req{'start'} = $time;
$req{'ring'} = $ring;
$req{'seqno'} = $seqno;
$req{'ctx'} = $ctx;
$ctxtimelines{$ctx . '/' . $ring} = 1;
$req{'name'} = $ctx . '/' . $seqno;
$req{'global'} = $tp{'global'};
$req{'port'} = $tp{'port'};
$req{'queue'} = $q;
$req{'submit'} = $s;
$req{'virtual'} = 1 if exists $queues{$ctx};
$rings{$ring} = $gid++ unless exists $rings{$ring};
$ringmap{$rings{$ring}} = $ring;
$db{$key} = \%req;
} elsif ($tp_name eq 'i915:i915_request_out:') {
if ($tp{'completed?'}) {
my $nkey;
die unless exists $db{$key};
die unless exists $db{$key}->{'start'};
die if exists $db{$key}->{'end'};
$nkey = notify_key($ctx, $seqno);
$db{$key}->{'end'} = $time;
$db{$key}->{'notify'} = $notify{$nkey}
if exists $notify{$nkey};
} else {
delete $db{$key};
}
} elsif ($tp_name eq 'dma_fence:dma_fence_signaled:') {
my $nkey;
next unless $tp{'driver'} eq 'i915' and
$tp{'timeline'} eq 'signaled';
$nkey = notify_key($tp{'context'}, $tp{'seqno'});
die if exists $notify{$nkey};
$notify{$nkey} = $time unless exists $notify{$nkey};
} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
$prev_freq_ts = $time;
$prev_freq = $tp{'new_freq'};
}
}
# Sanitation pass to fixup up out of order notify and context complete, and to
# find the largest seqno to be used for timeline sorting purposes.
my $max_seqno = 0;
foreach my $key (keys %db) {
my $nkey = notify_key($db{$key}->{'ctx'}, $db{$key}->{'seqno'});
die unless exists $db{$key}->{'start'};
$max_seqno = $db{$key}->{'seqno'} if $db{$key}->{'seqno'} > $max_seqno;
# Notify arrived after context complete?
$db{$key}->{'notify'} = $notify{$nkey} if not exists $db{$key}->{'notify'}
and exists $notify{$nkey};
# No notify but we have end?
$db{$key}->{'notify'} = $db{$key}->{'end'} if exists $db{$key}->{'end'} and
not exists $db{$key}->{'notify'};
# If user interrupt arrived out of order push it back to be no later
# than request out.
if (exists $db{$key}->{'end'} and exists $db{$key}->{'notify'} and
$db{$key}->{'notify'} > $db{$key}->{'end'}) {
$db{$key}->{'notify'} = $db{$key}->{'end'};
}
}
my $key_count = scalar(keys %db);
my %engine_timelines;
sub sortStart {
my $as = $db{$a}->{'start'};
my $bs = $db{$b}->{'start'};
my $val;
$val = $as <=> $bs;
$val = $a cmp $b if $val == 0;
return $val;
}
sub get_engine_timeline {
my ($ring) = @_;
my @timeline;
return $engine_timelines{$ring} if exists $engine_timelines{$ring};
@timeline = grep { $db{$_}->{'ring'} eq $ring } keys %db;
@timeline = sort sortStart @timeline;
$engine_timelines{$ring} = \@timeline;
return \@timeline;
}
# Fix up coalesced requests by ending them either when the following same
# context request with known end ends, or when a different context starts.
foreach my $gid (sort keys %rings) {
my $ring = $ringmap{$rings{$gid}};
my $timeline = get_engine_timeline($ring);
my $last_complete = -1;
my $last_ctx = -1;
my $complete;
foreach my $pos (0..$#{$timeline}) {
my $key = @{$timeline}[$pos];
my ($ctx, $end);
next if exists $db{$key}->{'end'};
$db{$key}->{'no-end'} = 1;
$ctx = $db{$key}->{'ctx'};
if ($pos > $last_complete or $ctx != $last_ctx) {
my $next = $pos;
undef $complete;
while ($next < $#{$timeline}) {
my $next_key = ${$timeline}[++$next];
if ($ctx == $db{$next_key}->{'ctx'} and
exists $db{$next_key}->{'end'}) {
$last_ctx = $db{$next_key}->{'ctx'};
$last_complete = $next;
$complete = $next_key;
last;
}
}
}
if (defined $complete) {
if ($ctx == $db{$complete}->{'ctx'}) {
$end = $db{$complete}->{'end'};
} else {
$end = $db{$complete}->{'start'};
}
} else {
# No next submission. Use notify if available or give up.
if (exists $db{$key}->{'notify'}) {
$end = $db{$key}->{'notify'};
} else {
$end = $db{$key}->{'start'};
$db{$key}->{'incomplete'} = 1;
}
}
unless (exists $db{$key}->{'notify'}) {
$db{$key}->{'notify'} = $end;
$db{$key}->{'no-notify'} = 1;
}
$db{$key}->{'end'} = $end;
$db{$key}->{'notify'} = $end if $db{$key}->{'notify'} > $end;
}
}
my $re_sort = 1;
my @sorted_keys;
sub maybe_sort_keys
{
if ($re_sort) {
@sorted_keys = sort sortStart keys %db;
$re_sort = 0;
die "Database changed size?!" unless scalar(@sorted_keys) ==
$key_count;
}
}
maybe_sort_keys();
my %ctx_timelines;
sub sortContext {
my $as = $db{$a}->{'seqno'};
my $bs = $db{$b}->{'seqno'};
my $val;
$val = $as <=> $bs;
die if $val == 0;
return $val;
}
sub get_ctx_timeline {
my ($ctx, $ring, $key) = @_;
my @timeline;
return $ctx_timelines{$key} if exists $ctx_timelines{$key};
@timeline = grep { $db{$_}->{'ring'} eq $ring and
$db{$_}->{'ctx'} == $ctx } @sorted_keys;
# FIXME seqno restart
@timeline = sort sortContext @timeline;
$ctx_timelines{$key} = \@timeline;
return \@timeline;
}
# Split out merged batches if requested.
if ($correct_durations) {
# Shift !port0 requests start time to after the previous context on the
# same timeline has finished.
foreach my $gid (sort keys %rings) {
my $ring = $ringmap{$rings{$gid}};
my $timeline = get_engine_timeline($ring);
my $complete;
foreach my $pos (0..$#{$timeline}) {
my $key = @{$timeline}[$pos];
my $prev = $complete;
my $pkey;
$complete = $key unless exists $db{$key}->{'no-end'};
$pkey = $complete;
next if $db{$key}->{'port'} == 0;
$pkey = $prev if $complete eq $key;
die unless defined $pkey;
$db{$key}->{'start'} = $db{$pkey}->{'end'};
$db{$key}->{'start'} = $db{$pkey}->{'notify'} if $db{$key}->{'start'} > $db{$key}->{'end'};
die if $db{$key}->{'start'} > $db{$key}->{'end'};
$re_sort = 1;
}
}
maybe_sort_keys();
# Batch with no-end (no request_out) means it was submitted as part of
# coalesced context. This means it's start time should be set to the end
# time of a following request on this context timeline.
foreach my $tkey (sort keys %ctxtimelines) {
my ($ctx, $ring) = split '/', $tkey;
my $timeline = get_ctx_timeline($ctx, $ring, $tkey);
my $last_complete = -1;
my $complete;
foreach my $pos (0..$#{$timeline}) {
my $key = @{$timeline}[$pos];
my $next_key;
next unless exists $db{$key}->{'no-end'};
last if $pos == $#{$timeline};
# Shift following request to start after the current
# one, but only if that wouldn't make it zero duration,
# which would indicate notify arrived after context
# complete.
$next_key = ${$timeline}[$pos + 1];
if (exists $db{$key}->{'notify'} and
$db{$key}->{'notify'} < $db{$key}->{'end'}) {
$db{$next_key}->{'engine-start'} = $db{$next_key}->{'start'};
$db{$next_key}->{'start'} = $db{$key}->{'notify'};
$re_sort = 1;
}
}
}
}
maybe_sort_keys();
# GPU time accounting
my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
my (%submit_avg, %execute_avg, %ctxsave_avg);
my $last_ts = 0;
my $first_ts;
my $min_ctx;
foreach my $key (@sorted_keys) {
my $ring = $db{$key}->{'ring'};
my $end = $db{$key}->{'end'};
my $start = $db{$key}->{'start'};
my $engine_start = $db{$key}->{'engine_start'};
my $notify = $db{$key}->{'notify'};
$first_ts = $db{$key}->{'queue'} if not defined $first_ts or $db{$key}->{'queue'} < $first_ts;
$last_ts = $end if $end > $last_ts;
$min_ctx = $db{$key}->{'ctx'} if not defined $min_ctx or
$db{$key}->{'ctx'} < $min_ctx;
unless (exists $db{$key}->{'no-end'}) {
$db{$key}->{'context-complete-delay'} = $end - $notify;
} else {
$db{$key}->{'context-complete-delay'} = 0;
}
$engine_start = $db{$key}->{'start'} unless defined $engine_start;
$db{$key}->{'execute-delay'} = $engine_start - $db{$key}->{'submit'};
$db{$key}->{'submit-delay'} = $db{$key}->{'submit'} - $db{$key}->{'queue'};
unless (exists $db{$key}->{'no-notify'}) {
$db{$key}->{'duration'} = $notify - $start;
} else {
$db{$key}->{'duration'} = 0;
}
$running{$ring} += $end - $start if $correct_durations or
not exists $db{$key}->{'no-end'};
unless (exists $db{$key}->{'virtual'}) {
$runnable{$ring} += $db{$key}->{'execute-delay'};
$queued{$ring} += $start - $db{$key}->{'execute-delay'} - $db{$key}->{'queue'};
}
$batch_count{$ring}++;
$batch_avg{$ring} += $db{$key}->{'duration'};
$batch_total_avg{$ring} += $end - $start;
$submit_avg{$ring} += $db{$key}->{'submit-delay'};
$execute_avg{$ring} += $db{$key}->{'execute-delay'};
$ctxsave_avg{$ring} += $db{$key}->{'context-complete-delay'};
}
foreach my $ring (sort keys %batch_avg) {
$batch_avg{$ring} /= $batch_count{$ring};
$batch_total_avg{$ring} /= $batch_count{$ring};
$submit_avg{$ring} /= $batch_count{$ring};
$execute_avg{$ring} /= $batch_count{$ring};
$ctxsave_avg{$ring} /= $batch_count{$ring};
}
# Calculate engine idle time
my %flat_busy;
foreach my $gid (sort keys %rings) {
my $ring = $ringmap{$rings{$gid}};
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
foreach my $key (@sorted_keys) {
next unless $db{$key}->{'ring'} eq $ring;
die if $db{$key}->{'start'} > $db{$key}->{'end'};
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
}
die unless $#s_ == $#e_;
# Flatten the intervals.
for my $i (1..$#s_) {
last if $i >= @s_; # End of array.
die if $e_[$i] < $s_[$i];
if ($s_[$i] <= $e_[$i - 1]) {
# Current entry overlaps with the previous one. We need
# to merge end of the previous interval from the list
# with the start of the current one.
if ($e_[$i] >= $e_[$i - 1]) {
splice @e_, $i - 1, 1;
} else {
splice @e_, $i, 1;
}
splice @s_, $i, 1;
# Continue with the same element when list got squashed.
redo;
}
}
# Add up all busy times.
my $total = 0;
for my $i (0..$#s_) {
die if $e_[$i] < $s_[$i];
$total = $total + ($e_[$i] - $s_[$i]);
}
$flat_busy{$ring} = $total;
}
# Calculate overall GPU idle time
my @gpu_intervals;
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
foreach my $key (@sorted_keys) {
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
die if $db{$key}->{'start'} > $db{$key}->{'end'};
}
die unless $#s_ == $#e_;
# Flatten the intervals (copy & paste of the flattening loop above)
for my $i (1..$#s_) {
last if $i >= @s_;
die if $e_[$i] < $s_[$i];
die if $s_[$i] < $s_[$i - 1];
if ($s_[$i] <= $e_[$i - 1]) {
if ($e_[$i] >= $e_[$i - 1]) {
splice @e_, $i - 1, 1;
} else {
splice @e_, $i, 1;
}
splice @s_, $i, 1;
redo;
}
}
# Add up all busy times.
my $total = 0;
for my $i (0..$#s_) {
die if $e_[$i] < $s_[$i];
$total = $total + ($e_[$i] - $s_[$i]);
}
# Generate data for the GPU timeline if requested
if ($gpu_timeline) {
for my $i (0..$#s_) {
push @gpu_intervals, [ $s_[$i], $e_[$i] ];
}
}
$flat_busy{'gpu-busy'} = $total / ($last_ts - $first_ts) * 100.0;
$flat_busy{'gpu-idle'} = (1.0 - $total / ($last_ts - $first_ts)) * 100.0;
# Add up all request waits per engine
my %reqw;
foreach my $key (keys %reqwait) {
$reqw{$reqwait{$key}->{'ring'}} += $reqwait{$key}->{'end'} - $reqwait{$key}->{'start'};
}
# Add up all request waits per virtual engine
my %vreqw;
foreach my $key (keys %reqwait) {
$vreqw{$reqwait{$key}->{'ctx'}} += $reqwait{$key}->{'end'} - $reqwait{$key}->{'start'};
}
say sprintf('GPU: %.2f%% idle, %.2f%% busy',
$flat_busy{'gpu-idle'}, $flat_busy{'gpu-busy'}) unless $html;
my $timeline_text = $colour_contexts ?
'per context coloured shading like' : 'box shading like';
my %ctx_colours;
my $ctx_table;
sub generate_ctx_table
{
my @states = ('queue', 'ready', 'execute', 'ctxsave', 'incomplete');
my $max_show = 6;
my (@ctxts, @disp_ctxts);
my $step;
if( $colour_contexts ) {
@ctxts = sort keys %ctxdb;
} else {
@ctxts = ($min_ctx);
}
# Limit number of shown context examples
$step = int(scalar(@ctxts) / $max_show);
if ($step) {
foreach my $i (0..$#ctxts) {
push @disp_ctxts, $ctxts[$i] unless $i % $step;
last if scalar(@disp_ctxts) == $max_show;
}
} else {
@disp_ctxts = @ctxts;
}
$ctx_table .= '<table>';
foreach my $ctx (@disp_ctxts) {
$ctx_table .= "<tr>\n";
$ctx_table .= " <td>Context $ctx</td>\n" if $colour_contexts;
foreach my $state (@states) {
$ctx_table .= " <td align='center' valign='middle'><div style='" . box_style($ctx, $state) . " padding-top: 6px; padding-bottom: 6px; padding-left: 6x; padding-right: 6px;'>" . uc($state) . "</div></td>\n";
}
$ctx_table .= "</tr>\n";
}
$ctx_table .= '</table>';
}
sub generate_ctx_colours
{
my $num_ctx = keys %ctxdb;
my $i = 0;
foreach my $ctx (sort keys %ctxdb) {
$ctx_colours{$ctx} = int(360 / $num_ctx * $i++);
}
}
generate_ctx_colours() if $html and $colour_contexts;
generate_ctx_table() if $html;
print <<ENDHTML if $html;
<!DOCTYPE HTML>
<html>
<head>
<title>i915 GT timeline</title>
<style type="text/css">
body, html {
font-family: sans-serif;
}
</style>
<script src="node_modules/vis/dist/vis.js"></script>
<link href="node_modules/vis//dist/vis.css" rel="stylesheet" type="text/css" />
</head>
<body>
<p>
<b>Timeline request view is $timeline_text:</b>
<table>
<tr>
<td>
$ctx_table
</td>
<td>
QUEUE = requests executing on the GPU<br>
READY = runnable requests waiting for a slot on GPU<br>
EXECUTE = requests waiting on fences and dependencies before they are runnable<br>
CTXSAVE = GPU saving the context image<br>
INCOMPLETE = request of unknown completion time
<p>
Boxes are in format 'ctx-id/seqno'.
</p>
<p>
Use Ctrl+scroll-action to zoom-in/out and scroll-action or dragging to move around the timeline.
</p>
<button onclick="toggleStacking()">Toggle overlap stacking</button>
</td>
</tr>
</table>
<p>
<b>GPU idle: $flat_busy{'gpu-idle'}%</b>
<br>
<b>GPU busy: $flat_busy{'gpu-busy'}%</b>
</p>
<div id="visualization"></div>
<script type="text/javascript">
var container = document.getElementById('visualization');
var groups = new vis.DataSet([
ENDHTML
# var groups = new vis.DataSet([
# {id: 1, content: 'g0'},
# {id: 2, content: 'g1'}
# ]);
sub html_stats
{
my ($stats, $group, $id) = @_;
my $veng = exists $stats->{'virtual'} ? 1 : 0;
my $name;
$name = $veng ? 'Virtual' : 'Ring';
$name .= $group;
$name .= '<br><small><br>';
unless ($veng) {
$name .= sprintf('%.2f', $stats->{'idle'}) . '% idle<br><br>';
$name .= sprintf('%.2f', $stats->{'busy'}) . '% busy<br>';
}
$name .= sprintf('%.2f', $stats->{'runnable'}) . '% runnable<br>';
$name .= sprintf('%.2f', $stats->{'queued'}) . '% queued<br><br>';
$name .= sprintf('%.2f', $stats->{'wait'}) . '% wait<br><br>';
$name .= $stats->{'count'} . ' batches<br>';
unless ($veng) {
$name .= sprintf('%.2f', $stats->{'avg'}) . 'us avg batch<br>';
$name .= sprintf('%.2f', $stats->{'total-avg'}) . 'us avg engine batch<br>';
}
$name .= '</small>';
print "\t{id: $id, content: '$name'},\n";
}
sub stdio_stats
{
my ($stats, $group, $id) = @_;
my $veng = exists $stats->{'virtual'} ? 1 : 0;
my $str;
$str = $veng ? 'Virtual' : 'Ring';
$str .= $group . ': ';
$str .= $stats->{'count'} . ' batches, ';
unless ($veng) {
$str .= sprintf('%.2f (%.2f) avg batch us, ',
$stats->{'avg'}, $stats->{'total-avg'});
$str .= sprintf('%.2f', $stats->{'idle'}) . '% idle, ';
$str .= sprintf('%.2f', $stats->{'busy'}) . '% busy, ';
}
$str .= sprintf('%.2f', $stats->{'runnable'}) . '% runnable, ';
$str .= sprintf('%.2f', $stats->{'queued'}) . '% queued, ';
$str .= sprintf('%.2f', $stats->{'wait'}) . '% wait';
if ($avg_delay_stats and not $veng) {
$str .= ', submit/execute/save-avg=(';
$str .= sprintf('%.2f/%.2f/%.2f)', $stats->{'submit'}, $stats->{'execute'}, $stats->{'save'});
}
say $str;
}
print "\t{id: 0, content: 'Freq'},\n" if $html;
print "\t{id: 1, content: 'GPU'},\n" if $gpu_timeline;
my $engine_start_id = $gpu_timeline ? 2 : 1;
foreach my $group (sort keys %rings) {
my $name;
my $ring = $ringmap{$rings{$group}};
my $id = $engine_start_id + $rings{$group};
my $elapsed = $last_ts - $first_ts;
my %stats;
$stats{'idle'} = (1.0 - $flat_busy{$ring} / $elapsed) * 100.0;
$stats{'busy'} = $running{$ring} / $elapsed * 100.0;
if (exists $runnable{$ring}) {
$stats{'runnable'} = $runnable{$ring} / $elapsed * 100.0;
} else {
$stats{'runnable'} = 0;
}
if (exists $queued{$ring}) {
$stats{'queued'} = $queued{$ring} / $elapsed * 100.0;
} else {
$stats{'queued'} = 0;
}
$reqw{$ring} = 0 unless exists $reqw{$ring};
$stats{'wait'} = $reqw{$ring} / $elapsed * 100.0;
$stats{'count'} = $batch_count{$ring};
$stats{'avg'} = $batch_avg{$ring};
$stats{'total-avg'} = $batch_total_avg{$ring};
$stats{'submit'} = $submit_avg{$ring};
$stats{'execute'} = $execute_avg{$ring};
$stats{'save'} = $ctxsave_avg{$ring};
if ($html) {
html_stats(\%stats, $group, $id);
} else {
stdio_stats(\%stats, $group, $id);
}
}
sub sortVQueue {
my $as = $vdb{$a}->{'queue'};
my $bs = $vdb{$b}->{'queue'};
my $val;
$val = $as <=> $bs;
$val = $a cmp $b if $val == 0;
return $val;
}
my @sorted_vkeys = sort sortVQueue keys %vdb;
my (%vqueued, %vrunnable);
foreach my $key (@sorted_vkeys) {
my $ctx = $vdb{$key}->{'ctx'};
$vdb{$key}->{'submit-delay'} = $vdb{$key}->{'submit'} - $vdb{$key}->{'queue'};
$vdb{$key}->{'execute-delay'} = $vdb{$key}->{'start'} - $vdb{$key}->{'submit'};
$vqueued{$ctx} += $vdb{$key}->{'submit-delay'};
$vrunnable{$ctx} += $vdb{$key}->{'execute-delay'};
}
my $veng_id = $engine_start_id + scalar(keys %rings);
foreach my $cid (sort keys %ctxmap) {
my $ctx = $ctxmap{$cid};
my $elapsed = $last_ts - $first_ts;
my %stats;
$stats{'virtual'} = 1;
if (exists $vrunnable{$ctx}) {
$stats{'runnable'} = $vrunnable{$ctx} / $elapsed * 100.0;
} else {
$stats{'runnable'} = 0;
}
if (exists $vqueued{$ctx}) {
$stats{'queued'} = $vqueued{$ctx} / $elapsed * 100.0;
} else {
$stats{'queued'} = 0;
}
$vreqw{$ctx} = 0 unless exists $vreqw{$ctx};
$stats{'wait'} = $vreqw{$ctx} / $elapsed * 100.0;
$stats{'count'} = scalar(grep {$ctx == $vdb{$_}->{'ctx'}} keys %vdb);
if ($html) {
html_stats(\%stats, $cid, $veng_id++);
} else {
stdio_stats(\%stats, $cid, $veng_id++);
}
}
exit 0 unless $html;
print <<ENDHTML;
]);
var items = new vis.DataSet([
ENDHTML
sub sortQueue {
my $as = $db{$a}->{'queue'};
my $bs = $db{$b}->{'queue'};
my $val;
$val = $as <=> $bs;
$val = $a cmp $b if $val == 0;
return $val;
}
sub ctx_colour
{
my ($ctx, $stage, $lfac) = (@_);
my ($s, $l);
my $val;
unless ($colour_contexts) {
if ($stage eq 'queue') {
$val = 210;
$s = 65;
$l = 52;
} elsif ($stage eq 'ready') {
$val = 0;
$s = 0;
$l = 47;
} elsif ($stage eq 'execute') {
$val = 346;
$s = 68;
$l = 65;
} elsif ($stage eq 'ctxsave') {
$val = 26;
$s = 90;
$l = 52;
} elsif ($stage eq 'incomplete') {
$val = 0;
$s = 85;
$l = 50;
}
} else {
if ($stage eq 'queue') {
$s = 35;
$l = 85;
} elsif ($stage eq 'ready') {
$s = 35;
$l = 45;
} elsif ($stage eq 'execute') {
$s = 80;
$l = 65;
} elsif ($stage eq 'ctxsave') {
$s = 75;
$l = 70;
} elsif ($stage eq 'incomplete') {
$s = 80;
$l = 25;
}
$val = $ctx_colours{$ctx};
}
$l = int($l * $lfac);
return "hsl($val, $s%, $l%)";
}
sub box_style
{
my ($ctx, $stage) = @_;
my $deg;
my $text_col = 'white';
if ($stage eq 'queue') {
$deg = 90;
$text_col = 'black' if $colour_contexts;
} elsif ($stage eq 'ready') {
$deg = 45;
} elsif ($stage eq 'execute') {
$deg = 0;
$text_col = 'black' if $colour_contexts;
} elsif ($stage eq 'ctxsave') {
$deg = 105;
$text_col = 'black' if $colour_contexts;
} elsif ($stage eq 'incomplete') {
$deg = 0;
}
return "color: $text_col; background: repeating-linear-gradient(" .
$deg . 'deg, ' .
ctx_colour($ctx, $stage, 1.0) . ', ' .
ctx_colour($ctx, $stage, 1.0) . ' 10px, ' .
ctx_colour($ctx, $stage, 0.90) . ' 10px, ' .
ctx_colour($ctx, $stage, 0.90) . ' 20px);';
}
my $i = 0;
my $req = 0;
foreach my $key (sort sortQueue keys %db) {
my ($name, $ctx, $seqno) = ($db{$key}->{'name'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'});
my ($queue, $start, $notify, $end) = ($db{$key}->{'queue'}, $db{$key}->{'start'}, $db{$key}->{'notify'}, $db{$key}->{'end'});
my $engine_start = $db{$key}->{'engine-start'};
my $submit = $queue + $db{$key}->{'submit-delay'};
my ($content, $style);
my $group = $engine_start_id + $rings{$db{$key}->{'ring'}};
my $subgroup = $ctx - $min_ctx;
my $type = ' type: \'range\',';
my $startend;
my $skey;
# submit to execute
unless (exists $skip_box{'queue'} or exists $db{$key}->{'virtual'}) {
$skey = 2 * $max_seqno * $ctx + 2 * $seqno;
$style = box_style($ctx, 'queue');
$content = "$name<br>$db{$key}->{'submit-delay'}us <small>($db{$key}->{'execute-delay'}us)</small>";
$startend = 'start: ' . $queue . ', end: ' . $submit;
print "\t{id: $i, key: $skey, $type group: $group, subgroup: $subgroup, subgroupOrder: $subgroup, content: '$content', $startend, style: \'$style\'},\n";
$i++;
}
# execute to start
$engine_start = $db{$key}->{'start'} unless defined $engine_start;
unless (exists $skip_box{'ready'} or exists $db{$key}->{'virtual'}) {
$skey = 2 * $max_seqno * $ctx + 2 * $seqno + 1;
$style = box_style($ctx, 'ready');
$content = "<small>$name<br>$db{$key}->{'execute-delay'}us</small>";
$startend = 'start: ' . $submit . ', end: ' . $engine_start;
print "\t{id: $i, key: $skey, $type group: $group, subgroup: $subgroup, subgroupOrder: $subgroup, content: '$content', $startend, style: \'$style\'},\n";
$i++;
}
# start to user interrupt
unless (exists $skip_box{'execute'}) {
$skey = -2 * $max_seqno * $ctx - 2 * $seqno - 1;
$style = box_style($ctx,
exists $db{$key}->{'incomplete'} ?
'incomplete' : 'execute');
$content = "$name <small>$db{$key}->{'port'}</small>";
$content .= ' <small><i>???</i></small> ' if exists $db{$key}->{'incomplete'};
$content .= ' <small><i>++</i></small> ' if exists $db{$key}->{'no-end'};
$content .= ' <small><i>+</i></small> ' if exists $db{$key}->{'no-notify'};
$content .= "<br>$db{$key}->{'duration'}us <small>($db{$key}->{'context-complete-delay'}us)</small>";
$startend = 'start: ' . $start . ', end: ' . $notify;
print "\t{id: $i, key: $skey, $type group: $group, subgroup: $subgroup, subgroupOrder: $subgroup, content: '$content', $startend, style: \'$style\'},\n";
$i++;
}
# user interrupt to context complete
unless (exists $skip_box{'ctxsave'} or exists $db{$key}->{'no-end'}) {
$skey = -2 * $max_seqno * $ctx - 2 * $seqno;
$style = box_style($ctx, 'ctxsave');
my $ctxsave = $db{$key}->{'end'} - $db{$key}->{'notify'};
$content = "<small>$name<br>${ctxsave}us</small>";
$content .= ' <small><i>???</i></small> ' if exists $db{$key}->{'incomplete'};
$content .= ' <small><i>++</i></small> ' if exists $db{$key}->{'no-end'};
$content .= ' <small><i>+</i></small> ' if exists $db{$key}->{'no-notify'};
$startend = 'start: ' . $notify . ', end: ' . $end;
print "\t{id: $i, key: $skey, $type group: $group, subgroup: $subgroup, subgroupOrder: $subgroup, content: '$content', $startend, style: \'$style\'},\n";
$i++;
}
$last_ts = $end;
last if ++$req > $max_requests;
}
push @freqs, [$prev_freq_ts, $last_ts, $prev_freq] if $prev_freq;
foreach my $item (@freqs) {
my ($start, $end, $freq) = @$item;
my $startend;
next if $start > $last_ts;
$start = $first_ts if $start < $first_ts;
$end = $last_ts if $end > $last_ts;
$startend = 'start: ' . $start . ', end: ' . $end;
print "\t{id: $i, type: 'range', group: 0, content: '$freq', $startend},\n";
$i++;
}
if ($gpu_timeline) {
foreach my $item (@gpu_intervals) {
my ($start, $end) = @$item;
my $startend;
next if $start > $last_ts;
$start = $first_ts if $start < $first_ts;
$end = $last_ts if $end > $last_ts;
$startend = 'start: ' . $start . ', end: ' . $end;
print "\t{id: $i, type: 'range', group: 1, $startend},\n";
$i++;
}
}
$req = 0;
$veng_id = $engine_start_id + scalar(keys %rings);
foreach my $key (@sorted_vkeys) {
my ($name, $ctx, $seqno) = ($vdb{$key}->{'name'}, $vdb{$key}->{'ctx'}, $vdb{$key}->{'seqno'});
my $queue = $vdb{$key}->{'queue'};
my $submit = $vdb{$key}->{'submit'};
my $engine_start = $db{$key}->{'engine-start'};
my ($content, $style, $startend, $skey);
my $group = $veng_id + $cids{$ctx};
my $subgroup = $ctx - $min_ctx;
my $type = ' type: \'range\',';
my $duration;
# submit to execute
unless (exists $skip_box{'queue'}) {
$skey = 2 * $max_seqno * $ctx + 2 * $seqno;
$style = box_style($ctx, 'queue');
$content = "$name<br>$vdb{$key}->{'submit-delay'}us <small>($vdb{$key}->{'execute-delay'}us)</small>";
$startend = 'start: ' . $queue . ', end: ' . $submit;
print "\t{id: $i, key: $skey, $type group: $group, subgroup: $subgroup, subgroupOrder: $subgroup, content: '$content', $startend, style: \'$style\'},\n";
$i++;
}
# execute to start
$engine_start = $vdb{$key}->{'start'} unless defined $engine_start;
unless (exists $skip_box{'ready'}) {
$skey = 2 * $max_seqno * $ctx + 2 * $seqno + 1;
$style = box_style($ctx, 'ready');
$content = "<small>$name<br>$vdb{$key}->{'execute-delay'}us</small>";
$startend = 'start: ' . $submit . ', end: ' . $engine_start;
print "\t{id: $i, key: $skey, $type group: $group, subgroup: $subgroup, subgroupOrder: $subgroup, content: '$content', $startend, style: \'$style\'},\n";
$i++;
}
last if ++$req > $max_requests;
}
my $end_ts = $first_ts + $width_us;
$first_ts = $first_ts;
print <<ENDHTML;
]);
function majorAxis(date, scale, step) {
var s = date / 1000000;
var precision;
if (scale == 'millisecond')
precision = 6;
else if (scale == 'second')
precision = 3;
else
precision = 0;
return s.toFixed(precision) + "s";
}
function minorAxis(date, scale, step) {
var t = date;
var precision;
var unit;
if (scale == 'millisecond') {
t %= 1000;
precision = 0;
unit = 'us';
} else if (scale == 'second') {
t /= 1000;
t %= 1000;
precision = 0;
unit = 'ms';
} else {
t /= 1000000;
precision = 1;
unit = 's';
}
return t.toFixed(precision) + unit;
}
// Configuration for the Timeline
var options = { groupOrder: 'content',
horizontalScroll: true,
stack: false,
stackSubgroups: false,
zoomKey: 'ctrlKey',
orientation: 'top',
format: { majorLabels: majorAxis, minorLabels: minorAxis },
start: $first_ts,
end: $end_ts};
// Create a Timeline
var timeline = new vis.Timeline(container, items, groups, options);
function toggleStacking() {
options.stack = !options.stack;
options.stackSubgroups = !options.stackSubgroups;
timeline.setOptions(options);
}
ENDHTML
print <<ENDHTML;
</script>
</body>
</html>
ENDHTML