You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1333 lines
33 KiB

/*
* Copyright © 2018-2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sched.h>
#include "igt.h"
#include "igt_perf.h"
#include "i915/gem_ring.h"
#include "sw_sync.h"
IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
static size_t sizeof_load_balance(int count)
{
return offsetof(struct i915_context_engines_load_balance,
engines[count]);
}
static size_t sizeof_param_engines(int count)
{
return offsetof(struct i915_context_param_engines,
engines[count]);
}
static size_t sizeof_engines_bond(int count)
{
return offsetof(struct i915_context_engines_bond,
engines[count]);
}
#define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
{
int fd;
fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
if (fd != -1) {
close(fd);
return true;
}
return false;
}
static struct i915_engine_class_instance *
list_engines(int i915, uint32_t class_mask, unsigned int *out)
{
unsigned int count = 0, size = 64;
struct i915_engine_class_instance *engines;
engines = malloc(size * sizeof(*engines));
igt_assert(engines);
for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
class_mask;
class++, class_mask >>= 1) {
if (!(class_mask & 1))
continue;
for (unsigned int instance = 0;
instance < INSTANCE_COUNT;
instance++) {
if (!has_class_instance(i915, class, instance))
continue;
if (count == size) {
size *= 2;
engines = realloc(engines,
size * sizeof(*engines));
igt_assert(engines);
}
engines[count++] = (struct i915_engine_class_instance){
.engine_class = class,
.engine_instance = instance,
};
}
}
if (!count) {
free(engines);
engines = NULL;
}
*out = count;
return engines;
}
static int __set_engines(int i915, uint32_t ctx,
const struct i915_engine_class_instance *ci,
unsigned int count)
{
struct i915_context_param_engines *engines =
alloca0(sizeof_param_engines(count));
struct drm_i915_gem_context_param p = {
.ctx_id = ctx,
.param = I915_CONTEXT_PARAM_ENGINES,
.size = sizeof_param_engines(count),
.value = to_user_pointer(engines)
};
engines->extensions = 0;
memcpy(engines->engines, ci, count * sizeof(*ci));
return __gem_context_set_param(i915, &p);
}
static void set_engines(int i915, uint32_t ctx,
const struct i915_engine_class_instance *ci,
unsigned int count)
{
igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
}
static int __set_load_balancer(int i915, uint32_t ctx,
const struct i915_engine_class_instance *ci,
unsigned int count,
void *ext)
{
struct i915_context_engines_load_balance *balancer =
alloca0(sizeof_load_balance(count));
struct i915_context_param_engines *engines =
alloca0(sizeof_param_engines(count + 1));
struct drm_i915_gem_context_param p = {
.ctx_id = ctx,
.param = I915_CONTEXT_PARAM_ENGINES,
.size = sizeof_param_engines(count + 1),
.value = to_user_pointer(engines)
};
balancer->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
balancer->base.next_extension = to_user_pointer(ext);
igt_assert(count);
balancer->num_siblings = count;
memcpy(balancer->engines, ci, count * sizeof(*ci));
engines->extensions = to_user_pointer(balancer);
engines->engines[0].engine_class =
I915_ENGINE_CLASS_INVALID;
engines->engines[0].engine_instance =
I915_ENGINE_CLASS_INVALID_NONE;
memcpy(engines->engines + 1, ci, count * sizeof(*ci));
return __gem_context_set_param(i915, &p);
}
static void set_load_balancer(int i915, uint32_t ctx,
const struct i915_engine_class_instance *ci,
unsigned int count,
void *ext)
{
igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
}
static uint32_t load_balancer_create(int i915,
const struct i915_engine_class_instance *ci,
unsigned int count)
{
uint32_t ctx;
ctx = gem_context_create(i915);
set_load_balancer(i915, ctx, ci, count, NULL);
return ctx;
}
static uint32_t __batch_create(int i915, uint32_t offset)
{
const uint32_t bbe = MI_BATCH_BUFFER_END;
uint32_t handle;
handle = gem_create(i915, ALIGN(offset + 4, 4096));
gem_write(i915, handle, offset, &bbe, sizeof(bbe));
return handle;
}
static uint32_t batch_create(int i915)
{
return __batch_create(i915, 0);
}
static void invalid_balancer(int i915)
{
I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
struct drm_i915_gem_context_param p = {
.param = I915_CONTEXT_PARAM_ENGINES,
.value = to_user_pointer(&engines)
};
uint32_t handle;
void *ptr;
/*
* Assume that I915_CONTEXT_PARAM_ENGINE validates the array
* of engines[], our job is to determine if the load_balancer
* extension explodes.
*/
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *ci;
unsigned int count;
ci = list_engines(i915, 1 << class, &count);
if (!ci)
continue;
igt_assert_lte(count, 64);
p.ctx_id = gem_context_create(i915);
p.size = (sizeof(struct i915_context_param_engines) +
(count + 1) * sizeof(*engines.engines));
memset(&engines, 0, sizeof(engines));
engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
memcpy(engines.engines + 1, ci, count * sizeof(*ci));
gem_context_set_param(i915, &p);
engines.extensions = -1ull;
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
engines.extensions = 1ull;
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
memset(&balancer, 0, sizeof(balancer));
balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
balancer.num_siblings = count;
memcpy(balancer.engines, ci, count * sizeof(*ci));
engines.extensions = to_user_pointer(&balancer);
gem_context_set_param(i915, &p);
balancer.engine_index = 1;
igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
balancer.engine_index = count;
igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
balancer.engine_index = count + 1;
igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
balancer.engine_index = 0;
gem_context_set_param(i915, &p);
balancer.base.next_extension = to_user_pointer(&balancer);
igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
balancer.base.next_extension = -1ull;
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
handle = gem_create(i915, 4096 * 3);
ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
gem_close(i915, handle);
memset(&engines, 0, sizeof(engines));
engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
memcpy(engines.engines + 2, ci, count * sizeof(ci));
p.size = (sizeof(struct i915_context_param_engines) +
(count + 2) * sizeof(*engines.engines));
gem_context_set_param(i915, &p);
balancer.base.next_extension = 0;
balancer.engine_index = 1;
engines.extensions = to_user_pointer(&balancer);
gem_context_set_param(i915, &p);
memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
balancer.engine_index = 0;
engines.extensions = to_user_pointer(ptr) + 4096 - 8;
gem_context_set_param(i915, &p);
balancer.base.next_extension = engines.extensions;
engines.extensions = to_user_pointer(&balancer);
gem_context_set_param(i915, &p);
munmap(ptr, 4096);
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
engines.extensions = to_user_pointer(ptr) + 4096 - 8;
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
engines.extensions = to_user_pointer(ptr) + 8192 - 8;
gem_context_set_param(i915, &p);
balancer.base.next_extension = engines.extensions;
engines.extensions = to_user_pointer(&balancer);
gem_context_set_param(i915, &p);
munmap(ptr + 8192, 4096);
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
engines.extensions = to_user_pointer(ptr) + 8192 - 8;
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
munmap(ptr + 4096, 4096);
gem_context_destroy(i915, p.ctx_id);
free(ci);
}
}
static void invalid_bonds(int i915)
{
I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
struct drm_i915_gem_context_param p = {
.ctx_id = gem_context_create(i915),
.param = I915_CONTEXT_PARAM_ENGINES,
.value = to_user_pointer(&engines),
.size = sizeof(engines),
};
uint32_t handle;
void *ptr;
memset(&engines, 0, sizeof(engines));
gem_context_set_param(i915, &p);
memset(bonds, 0, sizeof(bonds));
for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
bonds[n].base.next_extension =
n ? to_user_pointer(&bonds[n - 1]) : 0;
bonds[n].num_bonds = 1;
}
engines.extensions = to_user_pointer(&bonds);
gem_context_set_param(i915, &p);
bonds[0].base.next_extension = -1ull;
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
engines.extensions = to_user_pointer(&bonds[1]);
igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
bonds[0].base.next_extension = 0;
gem_context_set_param(i915, &p);
handle = gem_create(i915, 4096 * 3);
ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
gem_close(i915, handle);
memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
engines.extensions = to_user_pointer(ptr) + 4096;
gem_context_set_param(i915, &p);
memcpy(ptr, &bonds[0], sizeof(bonds[0]));
bonds[0].base.next_extension = to_user_pointer(ptr);
memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
gem_context_set_param(i915, &p);
munmap(ptr, 4096);
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
bonds[0].base.next_extension = 0;
memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
gem_context_set_param(i915, &p);
munmap(ptr + 8192, 4096);
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
munmap(ptr + 4096, 4096);
igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
gem_context_destroy(i915, p.ctx_id);
}
static void kick_kthreads(void)
{
usleep(20 * 1000); /* 20ms should be enough for ksoftirqd! */
}
static double measure_load(int pmu, int period_us)
{
uint64_t data[2];
uint64_t d_t, d_v;
kick_kthreads();
igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
d_v = -data[0];
d_t = -data[1];
usleep(period_us);
igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
d_v += data[0];
d_t += data[1];
return d_v / (double)d_t;
}
static double measure_min_load(int pmu, unsigned int num, int period_us)
{
uint64_t data[2 + num];
uint64_t d_t, d_v[num];
uint64_t min = -1, max = 0;
kick_kthreads();
igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
for (unsigned int n = 0; n < num; n++)
d_v[n] = -data[2 + n];
d_t = -data[1];
usleep(period_us);
igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
d_t += data[1];
for (unsigned int n = 0; n < num; n++) {
d_v[n] += data[2 + n];
igt_debug("engine[%d]: %.1f%%\n",
n, d_v[n] / (double)d_t * 100);
if (d_v[n] < min)
min = d_v[n];
if (d_v[n] > max)
max = d_v[n];
}
igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
d_t, min / (double)d_t * 100, max / (double)d_t * 100);
return min / (double)d_t;
}
static void measure_all_load(int pmu, double *v, unsigned int num, int period_us)
{
uint64_t data[2 + num];
uint64_t d_t, d_v[num];
kick_kthreads();
igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
for (unsigned int n = 0; n < num; n++)
d_v[n] = -data[2 + n];
d_t = -data[1];
usleep(period_us);
igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
d_t += data[1];
for (unsigned int n = 0; n < num; n++) {
d_v[n] += data[2 + n];
igt_debug("engine[%d]: %.1f%%\n",
n, d_v[n] / (double)d_t * 100);
v[n] = d_v[n] / (double)d_t;
}
}
static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
{
return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
ci->engine_instance),
pmu);
}
static const char *class_to_str(int class)
{
const char *str[] = {
[I915_ENGINE_CLASS_RENDER] = "rcs",
[I915_ENGINE_CLASS_COPY] = "bcs",
[I915_ENGINE_CLASS_VIDEO] = "vcs",
[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "vecs",
};
if (class < ARRAY_SIZE(str))
return str[class];
return "unk";
}
static void check_individual_engine(int i915,
uint32_t ctx,
const struct i915_engine_class_instance *ci,
int idx)
{
igt_spin_t *spin;
double load;
int pmu;
pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
ci[idx].engine_instance));
spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
load = measure_load(pmu, 10000);
igt_spin_free(i915, spin);
close(pmu);
igt_assert_f(load > 0.90,
"engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
}
static void individual(int i915)
{
uint32_t ctx;
/*
* I915_CONTEXT_PARAM_ENGINE allows us to index into the user
* supplied array from gem_execbuf(). Our check is to build the
* ctx->engine[] with various different engine classes, feed in
* a spinner and then ask pmu to confirm it the expected engine
* was busy.
*/
ctx = gem_context_create(i915);
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *ci;
unsigned int count;
ci = list_engines(i915, 1u << class, &count);
if (!ci)
continue;
for (int pass = 0; pass < count; pass++) { /* approx. count! */
igt_assert(sizeof(*ci) == sizeof(int));
igt_permute_array(ci, count, igt_exchange_int);
set_load_balancer(i915, ctx, ci, count, NULL);
for (unsigned int n = 0; n < count; n++)
check_individual_engine(i915, ctx, ci, n);
}
free(ci);
}
gem_context_destroy(i915, ctx);
gem_quiescent_gpu(i915);
}
static void bonded(int i915, unsigned int flags)
#define CORK 0x1
{
I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
struct i915_engine_class_instance *master_engines;
uint32_t master;
/*
* I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
* to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
* request submitted to another engine.
*/
master = gem_queue_create(i915);
memset(bonds, 0, sizeof(bonds));
for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
bonds[n].base.next_extension =
n ? to_user_pointer(&bonds[n - 1]) : 0;
bonds[n].num_bonds = 1;
}
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *siblings;
unsigned int count, limit, *order;
uint32_t ctx;
int n;
siblings = list_engines(i915, 1u << class, &count);
if (!siblings)
continue;
if (count < 2) {
free(siblings);
continue;
}
master_engines = list_engines(i915, ~(1u << class), &limit);
set_engines(i915, master, master_engines, limit);
limit = min(count, limit);
igt_assert(limit <= ARRAY_SIZE(bonds));
for (n = 0; n < limit; n++) {
bonds[n].master = master_engines[n];
bonds[n].engines[0] = siblings[n];
}
ctx = gem_context_clone(i915,
master, I915_CONTEXT_CLONE_VM,
I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
order = malloc(sizeof(*order) * 8 * limit);
igt_assert(order);
for (n = 0; n < limit; n++)
order[2 * limit - n - 1] = order[n] = n % limit;
memcpy(order + 2 * limit, order, 2 * limit * sizeof(*order));
memcpy(order + 4 * limit, order, 4 * limit * sizeof(*order));
igt_permute_array(order + 2 * limit, 6 * limit, igt_exchange_int);
for (n = 0; n < 8 * limit; n++) {
struct drm_i915_gem_execbuffer2 eb;
igt_spin_t *spin, *plug;
IGT_CORK_HANDLE(cork);
double v[limit];
int pmu[limit + 1];
int bond = order[n];
pmu[0] = -1;
for (int i = 0; i < limit; i++)
pmu[i] = add_pmu(pmu[0], &siblings[i]);
pmu[limit] = add_pmu(pmu[0], &master_engines[bond]);
igt_assert(siblings[bond].engine_class !=
master_engines[bond].engine_class);
plug = NULL;
if (flags & CORK) {
plug = __igt_spin_new(i915,
.ctx = master,
.engine = bond,
.dependency = igt_cork_plug(&cork, i915));
}
spin = __igt_spin_new(i915,
.ctx = master,
.engine = bond,
.flags = IGT_SPIN_FENCE_OUT);
eb = spin->execbuf;
eb.rsvd1 = ctx;
eb.rsvd2 = spin->out_fence;
eb.flags = I915_EXEC_FENCE_SUBMIT;
gem_execbuf(i915, &eb);
if (plug) {
igt_cork_unplug(&cork);
igt_spin_free(i915, plug);
}
measure_all_load(pmu[0], v, limit + 1, 10000);
igt_spin_free(i915, spin);
igt_assert_f(v[bond] > 0.90,
"engine %d (class:instance %s:%d) was found to be only %.1f%% busy\n",
bond,
class_to_str(siblings[bond].engine_class),
siblings[bond].engine_instance,
100 * v[bond]);
for (int other = 0; other < limit; other++) {
if (other == bond)
continue;
igt_assert_f(v[other] == 0,
"engine %d (class:instance %s:%d) was not idle, and actually %.1f%% busy\n",
other,
class_to_str(siblings[other].engine_class),
siblings[other].engine_instance,
100 * v[other]);
}
igt_assert_f(v[limit] > 0.90,
"master (class:instance %s:%d) was found to be only %.1f%% busy\n",
class_to_str(master_engines[bond].engine_class),
master_engines[bond].engine_instance,
100 * v[limit]);
close(pmu[0]);
}
free(order);
gem_context_destroy(i915, ctx);
free(master_engines);
free(siblings);
}
gem_context_destroy(i915, master);
}
static void indices(int i915)
{
I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
struct drm_i915_gem_context_param p = {
.ctx_id = gem_context_create(i915),
.param = I915_CONTEXT_PARAM_ENGINES,
.value = to_user_pointer(&engines)
};
struct drm_i915_gem_exec_object2 batch = {
.handle = batch_create(i915),
};
unsigned int nengines = 0;
void *balancers = NULL;
/*
* We can populate our engine map with multiple virtual engines.
* Do so.
*/
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *ci;
unsigned int count;
ci = list_engines(i915, 1u << class, &count);
if (!ci)
continue;
for (int n = 0; n < count; n++) {
struct i915_context_engines_load_balance *balancer;
engines.engines[nengines].engine_class =
I915_ENGINE_CLASS_INVALID;
engines.engines[nengines].engine_instance =
I915_ENGINE_CLASS_INVALID_NONE;
balancer = calloc(sizeof_load_balance(count), 1);
igt_assert(balancer);
balancer->base.name =
I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
balancer->base.next_extension =
to_user_pointer(balancers);
balancers = balancer;
balancer->engine_index = nengines++;
balancer->num_siblings = count;
memcpy(balancer->engines,
ci, count * sizeof(*ci));
}
free(ci);
}
igt_require(balancers);
engines.extensions = to_user_pointer(balancers);
p.size = (sizeof(struct i915_engine_class_instance) * nengines +
sizeof(struct i915_context_param_engines));
gem_context_set_param(i915, &p);
for (unsigned int n = 0; n < nengines; n++) {
struct drm_i915_gem_execbuffer2 eb = {
.buffers_ptr = to_user_pointer(&batch),
.buffer_count = 1,
.flags = n,
.rsvd1 = p.ctx_id,
};
igt_debug("Executing on index=%d\n", n);
gem_execbuf(i915, &eb);
}
gem_context_destroy(i915, p.ctx_id);
gem_sync(i915, batch.handle);
gem_close(i915, batch.handle);
while (balancers) {
struct i915_context_engines_load_balance *b, *n;
b = balancers;
n = from_user_pointer(b->base.next_extension);
free(b);
balancers = n;
}
gem_quiescent_gpu(i915);
}
static void busy(int i915)
{
uint32_t scratch = gem_create(i915, 4096);
/*
* Check that virtual engines are reported via GEM_BUSY.
*
* When running, the batch will be on the real engine and report
* the actual class.
*
* Prior to running, if the load-balancer is across multiple
* classes we don't know which engine the batch will
* execute on, so we report them all!
*
* However, as we only support (and test) creating a load-balancer
* from engines of only one class, that can be propagated accurately
* through to GEM_BUSY.
*/
for (int class = 0; class < 16; class++) {
struct drm_i915_gem_busy busy;
struct i915_engine_class_instance *ci;
unsigned int count;
igt_spin_t *spin[2];
uint32_t ctx;
ci = list_engines(i915, 1u << class, &count);
if (!ci)
continue;
ctx = load_balancer_create(i915, ci, count);
free(ci);
spin[0] = __igt_spin_new(i915,
.ctx = ctx,
.flags = IGT_SPIN_POLL_RUN);
spin[1] = __igt_spin_new(i915,
.ctx = ctx,
.dependency = scratch);
igt_spin_busywait_until_started(spin[0]);
/* Running: actual class */
busy.handle = spin[0]->handle;
do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
igt_assert_eq_u32(busy.busy, 1u << (class + 16));
/* Queued(read): expected class */
busy.handle = spin[1]->handle;
do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
igt_assert_eq_u32(busy.busy, 1u << (class + 16));
/* Queued(write): expected class */
busy.handle = scratch;
do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
igt_assert_eq_u32(busy.busy,
(1u << (class + 16)) | (class + 1));
igt_spin_free(i915, spin[1]);
igt_spin_free(i915, spin[0]);
gem_context_destroy(i915, ctx);
}
gem_close(i915, scratch);
gem_quiescent_gpu(i915);
}
static void full(int i915, unsigned int flags)
#define PULSE 0x1
#define LATE 0x2
{
struct drm_i915_gem_exec_object2 batch = {
.handle = batch_create(i915),
};
if (flags & LATE)
igt_require_sw_sync();
/*
* I915_CONTEXT_PARAM_ENGINE changes the meaning of engine selector in
* execbuf to utilize our own map, into which we replace I915_EXEC_DEFAULT
* to provide an automatic selection from the other ctx->engine[]. It
* employs load-balancing to evenly distribute the workload the
* array. If we submit N spinners, we expect them to be simultaneously
* running across N engines and use PMU to confirm that the entire
* set of engines are busy.
*
* We complicate matters by interspersing short-lived tasks to
* challenge the kernel to search for space in which to insert new
* batches.
*/
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *ci;
igt_spin_t *spin = NULL;
IGT_CORK_FENCE(cork);
unsigned int count;
double load;
int fence = -1;
int *pmu;
ci = list_engines(i915, 1u << class, &count);
if (!ci)
continue;
pmu = malloc(sizeof(*pmu) * count);
igt_assert(pmu);
if (flags & LATE)
fence = igt_cork_plug(&cork, i915);
pmu[0] = -1;
for (unsigned int n = 0; n < count; n++) {
uint32_t ctx;
pmu[n] = add_pmu(pmu[0], &ci[n]);
if (flags & PULSE) {
struct drm_i915_gem_execbuffer2 eb = {
.buffers_ptr = to_user_pointer(&batch),
.buffer_count = 1,
.rsvd2 = fence,
.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
};
gem_execbuf(i915, &eb);
}
/*
* Each spinner needs to be one a new timeline,
* otherwise they will just sit in the single queue
* and not run concurrently.
*/
ctx = load_balancer_create(i915, ci, count);
if (spin == NULL) {
spin = __igt_spin_new(i915, .ctx = ctx);
} else {
struct drm_i915_gem_execbuffer2 eb = {
.buffers_ptr = spin->execbuf.buffers_ptr,
.buffer_count = spin->execbuf.buffer_count,
.rsvd1 = ctx,
.rsvd2 = fence,
.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
};
gem_execbuf(i915, &eb);
}
gem_context_destroy(i915, ctx);
}
if (flags & LATE) {
igt_cork_unplug(&cork);
close(fence);
}
load = measure_min_load(pmu[0], count, 10000);
igt_spin_free(i915, spin);
close(pmu[0]);
free(pmu);
free(ci);
igt_assert_f(load > 0.90,
"minimum load for %d x class:%d was found to be only %.1f%% busy\n",
count, class, load*100);
gem_quiescent_gpu(i915);
}
gem_close(i915, batch.handle);
gem_quiescent_gpu(i915);
}
static void nop(int i915)
{
struct drm_i915_gem_exec_object2 batch = {
.handle = batch_create(i915),
};
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *ci;
unsigned int count;
uint32_t ctx;
ci = list_engines(i915, 1u << class, &count);
if (!ci)
continue;
ctx = load_balancer_create(i915, ci, count);
for (int n = 0; n < count; n++) {
struct drm_i915_gem_execbuffer2 execbuf = {
.buffers_ptr = to_user_pointer(&batch),
.buffer_count = 1,
.flags = n + 1,
.rsvd1 = ctx,
};
struct timespec tv = {};
unsigned long nops;
double t;
igt_nsec_elapsed(&tv);
nops = 0;
do {
for (int r = 0; r < 1024; r++)
gem_execbuf(i915, &execbuf);
nops += 1024;
} while (igt_seconds_elapsed(&tv) < 2);
gem_sync(i915, batch.handle);
t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
igt_info("%s:%d %.3fus\n", class_to_str(class), n, t);
}
{
struct drm_i915_gem_execbuffer2 execbuf = {
.buffers_ptr = to_user_pointer(&batch),
.buffer_count = 1,
.rsvd1 = ctx,
};
struct timespec tv = {};
unsigned long nops;
double t;
igt_nsec_elapsed(&tv);
nops = 0;
do {
for (int r = 0; r < 1024; r++)
gem_execbuf(i915, &execbuf);
nops += 1024;
} while (igt_seconds_elapsed(&tv) < 2);
gem_sync(i915, batch.handle);
t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
igt_info("%s:* %.3fus\n", class_to_str(class), t);
}
igt_fork(child, count) {
struct drm_i915_gem_execbuffer2 execbuf = {
.buffers_ptr = to_user_pointer(&batch),
.buffer_count = 1,
.flags = child + 1,
.rsvd1 = gem_context_clone(i915, ctx,
I915_CONTEXT_CLONE_ENGINES, 0),
};
struct timespec tv = {};
unsigned long nops;
double t;
igt_nsec_elapsed(&tv);
nops = 0;
do {
for (int r = 0; r < 1024; r++)
gem_execbuf(i915, &execbuf);
nops += 1024;
} while (igt_seconds_elapsed(&tv) < 2);
gem_sync(i915, batch.handle);
t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
igt_info("[%d] %s:%d %.3fus\n",
child, class_to_str(class), child, t);
memset(&tv, 0, sizeof(tv));
execbuf.flags = 0;
igt_nsec_elapsed(&tv);
nops = 0;
do {
for (int r = 0; r < 1024; r++)
gem_execbuf(i915, &execbuf);
nops += 1024;
} while (igt_seconds_elapsed(&tv) < 2);
gem_sync(i915, batch.handle);
t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
igt_info("[%d] %s:* %.3fus\n",
child, class_to_str(class), t);
gem_context_destroy(i915, execbuf.rsvd1);
}
igt_waitchildren();
gem_context_destroy(i915, ctx);
free(ci);
}
gem_close(i915, batch.handle);
gem_quiescent_gpu(i915);
}
static void ping(int i915, uint32_t ctx, unsigned int engine)
{
struct drm_i915_gem_exec_object2 obj = {
.handle = batch_create(i915),
};
struct drm_i915_gem_execbuffer2 execbuf = {
.buffers_ptr = to_user_pointer(&obj),
.buffer_count = 1,
.flags = engine,
.rsvd1 = ctx,
};
gem_execbuf(i915, &execbuf);
gem_sync(i915, obj.handle);
gem_close(i915, obj.handle);
}
static void semaphore(int i915)
{
uint32_t block[2], scratch;
igt_spin_t *spin[3];
/*
* If we are using HW semaphores to launch serialised requests
* on different engine concurrently, we want to verify that real
* work is unimpeded.
*/
igt_require(gem_scheduler_has_preemption(i915));
block[0] = gem_context_create(i915);
block[1] = gem_context_create(i915);
scratch = gem_create(i915, 4096);
spin[2] = igt_spin_new(i915, .dependency = scratch);
for (int class = 1; class < 32; class++) {
struct i915_engine_class_instance *ci;
unsigned int count;
uint32_t vip;
ci = list_engines(i915, 1u << class, &count);
if (!ci)
continue;
if (count < ARRAY_SIZE(block))
continue;
/* Ensure that we completely occupy all engines in this group */
count = ARRAY_SIZE(block);
for (int i = 0; i < count; i++) {
set_load_balancer(i915, block[i], ci, count, NULL);
spin[i] = __igt_spin_new(i915,
.ctx = block[i],
.dependency = scratch);
}
/*
* Either we haven't blocked both engines with semaphores,
* or we let the vip through. If not, we hang.
*/
vip = gem_context_create(i915);
set_load_balancer(i915, vip, ci, count, NULL);
ping(i915, vip, 0);
gem_context_destroy(i915, vip);
for (int i = 0; i < count; i++)
igt_spin_free(i915, spin[i]);
free(ci);
}
igt_spin_free(i915, spin[2]);
gem_close(i915, scratch);
gem_context_destroy(i915, block[1]);
gem_context_destroy(i915, block[0]);
gem_quiescent_gpu(i915);
}
static void smoketest(int i915, int timeout)
{
struct drm_i915_gem_exec_object2 batch[2] = {
{ .handle = __batch_create(i915, 16380) }
};
unsigned int ncontext = 0;
uint32_t *contexts = NULL;
uint32_t *handles = NULL;
igt_require_sw_sync();
for (int class = 0; class < 32; class++) {
struct i915_engine_class_instance *ci;
unsigned int count = 0;
ci = list_engines(i915, 1u << class, &count);
if (!ci || count < 2) {
free(ci);
continue;
}
ncontext += 128;
contexts = realloc(contexts, sizeof(*contexts) * ncontext);
igt_assert(contexts);
for (unsigned int n = ncontext - 128; n < ncontext; n++) {
contexts[n] = load_balancer_create(i915, ci, count);
igt_assert(contexts[n]);
}
free(ci);
}
igt_debug("Created %d virtual engines (one per context)\n", ncontext);
igt_require(ncontext);
contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
igt_assert(contexts);
memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
ncontext *= 2;
memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
ncontext *= 2;
handles = malloc(sizeof(*handles) * ncontext);
igt_assert(handles);
for (unsigned int n = 0; n < ncontext; n++)
handles[n] = gem_create(i915, 4096);
igt_until_timeout(timeout) {
unsigned int count = 1 + (rand() % (ncontext - 1));
IGT_CORK_FENCE(cork);
int fence = igt_cork_plug(&cork, i915);
for (unsigned int n = 0; n < count; n++) {
struct drm_i915_gem_execbuffer2 eb = {
.buffers_ptr = to_user_pointer(batch),
.buffer_count = ARRAY_SIZE(batch),
.rsvd1 = contexts[n],
.rsvd2 = fence,
.flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
};
batch[1].handle = handles[n];
gem_execbuf(i915, &eb);
}
igt_permute_array(handles, count, igt_exchange_int);
igt_cork_unplug(&cork);
for (unsigned int n = 0; n < count; n++)
gem_sync(i915, handles[n]);
close(fence);
}
for (unsigned int n = 0; n < ncontext; n++) {
gem_close(i915, handles[n]);
__gem_context_destroy(i915, contexts[n]);
}
free(handles);
free(contexts);
gem_close(i915, batch[0].handle);
}
static bool has_context_engines(int i915)
{
struct drm_i915_gem_context_param p = {
.param = I915_CONTEXT_PARAM_ENGINES,
};
return __gem_context_set_param(i915, &p) == 0;
}
static bool has_load_balancer(int i915)
{
struct i915_engine_class_instance ci = {};
uint32_t ctx;
int err;
ctx = gem_context_create(i915);
err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
gem_context_destroy(i915, ctx);
return err == 0;
}
igt_main
{
int i915 = -1;
igt_skip_on_simulation();
igt_fixture {
i915 = drm_open_driver(DRIVER_INTEL);
igt_require_gem(i915);
gem_require_contexts(i915);
igt_require(has_context_engines(i915));
igt_require(has_load_balancer(i915));
igt_fork_hang_detector(i915);
}
igt_subtest("invalid-balancer")
invalid_balancer(i915);
igt_subtest("invalid-bonds")
invalid_bonds(i915);
igt_subtest("individual")
individual(i915);
igt_subtest("indices")
indices(i915);
igt_subtest("busy")
busy(i915);
igt_subtest_group {
static const struct {
const char *name;
unsigned int flags;
} phases[] = {
{ "", 0 },
{ "-pulse", PULSE },
{ "-late", LATE },
{ "-late-pulse", PULSE | LATE },
{ }
};
for (typeof(*phases) *p = phases; p->name; p++)
igt_subtest_f("full%s", p->name)
full(i915, p->flags);
}
igt_subtest("nop")
nop(i915);
igt_subtest("semaphore")
semaphore(i915);
igt_subtest("smoke")
smoketest(i915, 20);
igt_subtest("bonded-imm")
bonded(i915, 0);
igt_subtest("bonded-cork")
bonded(i915, CORK);
igt_fixture {
igt_stop_hang_detector();
}
}